In [1]:
#Necessary Imports

from IPython.display import display
from kmodes.kmodes import KModes
from scipy.spatial.distance import cdist
from sklearn.cluster import AgglomerativeClustering
from sklearn.cluster import KMeans
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import roc_curve
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import plotly.figure_factory as ff
import scipy.cluster.hierarchy as sch
import seaborn as sns
plt.style.use('bmh')


import warnings
warnings.filterwarnings("ignore")

#To make the dataframe to fully display and not wrap around
pd.set_option('display.expand_frame_repr', False)

Data Loading

In [2]:
df_9= pd.read_excel("DDS9_Data_Extract_with_labels.xlsx")
df_10=pd.read_excel("DDS10_Data_Extract_with_labels.xlsx")
df_11=pd.read_excel("DDS11_Data_Extract_with_labels.xlsx")

#deleting the unrelated columns - Record Number
df_9.drop(df_9.columns[0],axis=1,inplace=True)
df_10.drop(df_10.columns[0],axis=1,inplace=True)
df_11.drop(df_11.columns[0],axis=1,inplace=True)

#printing shapes for all dataframes
r1,c1=df_9.shape
r2,c2=df_10.shape
r3,c3=df_11.shape
print("there are ",r1,'rows and ',c1,' columns in 2009 dataset')
print("there are ",r2,'rows and ',c2,' columns in 2010 dataset')
print("there are ",r3,'rows and ',c3,' columns in 2011 dataset')
there are  2076 rows and  190  columns in 2009 dataset
there are  2205 rows and  196  columns in 2010 dataset
there are  2131 rows and  197  columns in 2011 dataset
In [3]:
#since the column names are large, saving them in numpy arrays
column_names_9=df_9.columns.to_numpy()
column_names_10=df_10.columns.to_numpy()
column_names_11=df_11.columns.to_numpy()

labels=['Col'+str(x) for x in range(1,c1+1)]
df_9.columns=labels
labels=['Col'+str(x) for x in range(1,c2+1)]
df_10.columns=labels
labels=['Col'+str(x) for x in range(1,c3+1)]
df_11.columns=labels

Data Pre-processing

In [4]:
#printing column names in dataframes having null values
print("Columns containing null values in 2009 dataset:")
print(df_9.columns[df_9.isna().any()])

print("Columns containing null values in 2010 dataset:")
print(df_10.columns[df_10.isna().any()])

print("Columns containing null values in 2011 dataset:")
print(df_11.columns[df_11.isna().any()])
Columns containing null values in 2009 dataset:
Index(['Col9', 'Col10', 'Col11', 'Col12', 'Col13', 'Col14', 'Col15', 'Col63',
       'Col64', 'Col65', 'Col66', 'Col67', 'Col68', 'Col69', 'Col70', 'Col71',
       'Col72', 'Col73', 'Col74', 'Col75', 'Col76', 'Col77', 'Col78', 'Col79',
       'Col80', 'Col81', 'Col82', 'Col83', 'Col84', 'Col97', 'Col98', 'Col99',
       'Col100', 'Col101', 'Col102', 'Col103', 'Col104', 'Col105', 'Col106',
       'Col107', 'Col108', 'Col109', 'Col110', 'Col111', 'Col112', 'Col113',
       'Col114', 'Col115', 'Col116', 'Col117', 'Col118', 'Col119', 'Col120',
       'Col121', 'Col122', 'Col123', 'Col124', 'Col125', 'Col126', 'Col139',
       'Col140', 'Col141', 'Col142', 'Col143', 'Col144', 'Col145', 'Col146',
       'Col147', 'Col148', 'Col149', 'Col150', 'Col151', 'Col152', 'Col153',
       'Col154', 'Col155', 'Col156', 'Col157', 'Col158', 'Col159', 'Col173',
       'Col174', 'Col175', 'Col176', 'Col177', 'Col178', 'Col179', 'Col180',
       'Col181', 'Col182', 'Col183', 'Col184'],
      dtype='object')
Columns containing null values in 2010 dataset:
Index(['Col9', 'Col10', 'Col11', 'Col12', 'Col13', 'Col14', 'Col15', 'Col59',
       'Col60', 'Col61', 'Col62', 'Col63', 'Col64', 'Col65', 'Col66', 'Col67',
       'Col68', 'Col69', 'Col70', 'Col71', 'Col72', 'Col73', 'Col74', 'Col75',
       'Col76', 'Col77', 'Col78', 'Col91', 'Col92', 'Col93', 'Col94', 'Col95',
       'Col96', 'Col97', 'Col98', 'Col99', 'Col100', 'Col101', 'Col102',
       'Col103', 'Col104', 'Col105', 'Col106', 'Col107', 'Col108', 'Col109',
       'Col110', 'Col111', 'Col112', 'Col113', 'Col114', 'Col115', 'Col116',
       'Col117', 'Col118', 'Col119', 'Col120', 'Col121', 'Col122', 'Col123',
       'Col124', 'Col125', 'Col126', 'Col127', 'Col140', 'Col141', 'Col142',
       'Col143', 'Col144', 'Col145', 'Col146', 'Col147', 'Col148', 'Col149',
       'Col150', 'Col151', 'Col152', 'Col153', 'Col154', 'Col155', 'Col156',
       'Col157', 'Col158', 'Col159', 'Col160'],
      dtype='object')
Columns containing null values in 2011 dataset:
Index(['Col9', 'Col10', 'Col11', 'Col12', 'Col13', 'Col14', 'Col15', 'Col59',
       'Col60', 'Col61', 'Col62', 'Col63', 'Col64', 'Col65', 'Col66', 'Col67',
       'Col68', 'Col69', 'Col70', 'Col71', 'Col72', 'Col73', 'Col74', 'Col75',
       'Col76', 'Col77', 'Col78', 'Col91', 'Col92', 'Col93', 'Col94', 'Col95',
       'Col96', 'Col97', 'Col98', 'Col99', 'Col100', 'Col101', 'Col102',
       'Col103', 'Col104', 'Col105', 'Col106', 'Col107', 'Col108', 'Col109',
       'Col110', 'Col111', 'Col112', 'Col113', 'Col114', 'Col115', 'Col116',
       'Col117', 'Col118', 'Col119', 'Col120', 'Col121', 'Col122', 'Col123',
       'Col124', 'Col125', 'Col126', 'Col127', 'Col140', 'Col141', 'Col142',
       'Col143', 'Col144', 'Col145', 'Col146', 'Col147', 'Col148', 'Col149',
       'Col150', 'Col151', 'Col152', 'Col153', 'Col154', 'Col155', 'Col156',
       'Col157', 'Col158', 'Col159', 'Col160', 'Col174'],
      dtype='object')
In [5]:
nulls = df_9.isnull().sum()#.sort_values(ascending=False)
nulls.plot(kind='bar', figsize=(60, 10))
Out[5]:
<matplotlib.axes._subplots.AxesSubplot at 0x16d04d1d240>
In [6]:
nulls = df_10.isnull().sum()#.sort_values(ascending=False)
nulls.plot(
    kind='bar', figsize=(60, 10))
Out[6]:
<matplotlib.axes._subplots.AxesSubplot at 0x16d07563358>
In [7]:
nulls = df_11.isnull().sum()#.sort_values(ascending=False)
nulls.plot(
    kind='bar', figsize=(60, 10))
Out[7]:
<matplotlib.axes._subplots.AxesSubplot at 0x16d06ab3f28>

As we can see from above graphs,the dataset has similar columns having null values across the years. The dataset shows high amount of similarity.

In [8]:
a=df_9.isnull().sum().to_frame()
b=df_10.isnull().sum().to_frame()
c=df_11.isnull().sum().to_frame()
a[a[0]>0]

#replacing NaN's with -9999
df_9.fillna(-9999,inplace=True)
df_10.fillna(-9999,inplace=True)
df_11.fillna(-9999,inplace=True)
In [9]:
print("Before processing:")
print(df_9.shape)
print(df_10.shape)
print(df_11.shape)
print('\n')

#dropping all rows where the Age is greater than 100
df_9.drop( df_9[df_9['Col1'] >= 100].index , inplace=True)
df_10.drop( df_10[df_10['Col1'] >= 100].index , inplace=True)
df_11.drop( df_11[df_11['Col1'] >= 100].index , inplace=True)


#dropping all rows where the Age is less than 10
df_9.drop( df_9[df_9['Col1'] <= 10].index , inplace=True)
df_10.drop( df_10[df_10['Col1'] <= 10].index , inplace=True)
df_11.drop( df_11[df_11['Col1'] <= 10].index , inplace=True)


#dropping all rows where the Income is unknown
df_9.drop( df_9[df_9['Col16'] == 'Do not know'].index , inplace=True)
df_10.drop( df_10[df_10['Col16'] == 'Do not know'].index , inplace=True)
df_11.drop( df_11[df_11['Col16'] == 'Do not know'].index , inplace=True)
print("After processing:")
print(df_9.shape)
print(df_10.shape)
print(df_11.shape)
Before processing:
(2076, 190)
(2205, 196)
(2131, 197)


After processing:
(1900, 190)
(2035, 196)
(2011, 197)
In [10]:
def plotPerColumnDistribution(df, nGraphShown, nGraphPerRow):
    nunique = df.nunique()
    df = df[[col for col in df if nunique[col] > 1 and nunique[col] < 50]] 
    nRow, nCol = df.shape
    columnNames = list(df)
    nGraphRow = (nCol + nGraphPerRow - 1) / nGraphPerRow
    plt.figure(num = None, figsize = (6 * nGraphPerRow, 8 * nGraphRow), dpi = 80, facecolor = 'w', edgecolor = 'k')
    for i in range(min(nCol, nGraphShown)):
        plt.subplot(nGraphRow, nGraphPerRow, i + 1)
        columnDf = df.iloc[:, i]
        if (not np.issubdtype(type(columnDf.iloc[0]), np.number)):
            valueCounts = columnDf.value_counts()
            valueCounts.plot.bar()
        else:
            columnDf.hist()
        plt.ylabel('counts')
        plt.xticks(rotation = 90)
        plt.title(f'{columnNames[i]} (column {i})')
    plt.tight_layout(pad = 1.0, w_pad = 1.0, h_pad = 1.0)
    plt.show()


print("Distribution Graphs for 2009 dataset:\n")
plotPerColumnDistribution(df_9, 4, 3)
print("\nDistribution Graphs for 2010 dataset:\n")
plotPerColumnDistribution(df_10, 4, 3)
print("\nDistribution Graphs for 2011 dataset:\n")
plotPerColumnDistribution(df_11, 4, 3)
Distribution Graphs for 2009 dataset:

Distribution Graphs for 2010 dataset:

Distribution Graphs for 2011 dataset:

We could see that there is an equal distribution of male and female respondents.

Majority of repondents are having jobs or are retired.

The data is biased towards south and western states of USA geographically

Demographically, the distribution of respondents's age is normal and even across the years.

In [11]:
# Correlation matrix
def plotCorrelationMatrix(df, graphWidth):
    
    df = df.dropna('columns') # drop columns with NaN
    df = df[[col for col in df if df[col].nunique() > 1]] # keep columns where there are more than 1 unique values
    if df.shape[1] < 2:
        print(f'No correlation plots shown: The number of non-NaN or constant columns ({df.shape[1]}) is less than 2')
        return
    corr = df.corr()
    plt.figure(num=None, figsize=(graphWidth, graphWidth), dpi=80, facecolor='w', edgecolor='k')
    corrMat = plt.matshow(corr, fignum = 1)
    plt.xticks(range(len(corr.columns)), corr.columns, rotation=90)
    plt.yticks(range(len(corr.columns)), corr.columns)
    plt.gca().xaxis.tick_bottom()
    plt.colorbar(corrMat)
    plt.title(f'Correlation Matrix for {str(df)}', fontsize=15)
    plt.show()
    
plotCorrelationMatrix(df_9, 47)
plotCorrelationMatrix(df_10, 47)
plotCorrelationMatrix(df_11, 47)

We could see that across the year's , there is no direct correlation amongst the features of the dataset.

In [12]:
# Scatter and density plots
def plotScatterMatrix(df, plotSize, textSize):
    df = df.select_dtypes(include =[np.number]) # keep only numerical columns
    # Remove rows and columns that would lead to df being singular
    df = df.dropna('columns')
    df = df[[col for col in df if df[col].nunique() > 1]] # keep columns where there are more than 1 unique values
    columnNames = list(df)
    if len(columnNames) > 10: # reduce the number of columns for matrix inversion of kernel density plots
        columnNames = columnNames[:10]
    df = df[columnNames]
    ax = pd.plotting.scatter_matrix(df, alpha=0.75, figsize=[plotSize, plotSize], diagonal='kde')
    corrs = df.corr().values
    for i, j in zip(*plt.np.triu_indices_from(ax, k = 1)):
        ax[i, j].annotate('Corr. coef = %.3f' % corrs[i, j], (0.8, 0.2), xycoords='axes fraction', ha='center', va='center', size=textSize)
    plt.suptitle('Scatter and Density Plot')
    plt.show()

plotScatterMatrix(df_9, 20, 10)
    

Data Preperation for Question - 1

In [13]:
#reassigning column names for filtering purpose of data in 2009
df_10.columns=column_names_10
df_10

#part1 contains all columns having Q10 data
part1=df_10.filter(regex='^Q10',axis=1)
#part2 contains all columns having Q26 data
part2=df_10.filter(regex='^Q26',axis=1)
# part3 contains all columns having Q8 data
part3=df_10.filter(regex='^Q8',axis=1)

#dropping column having name Q89
part3.drop(part3.columns[len(part3.columns)-1], axis=1, inplace=True)

print('There are' ,part1.isna().any().sum(),' null values in part1 dataframe.')
print('There are' ,part2.isna().any().sum(),' null values in part2 dataframe.')
print('There are' ,part3.isna().any().sum(),' null values in part3 dataframe.')

#Replacing all NaN Values with No
part1.replace(to_replace ="-9999", value ="No",inplace=True)
part2.replace(to_replace ="-9999", value ="No",inplace=True)
part3.replace(to_replace ="-9999", value ="No",inplace=True)

#replacing all Yes with 1 and No with 0
part1.replace(to_replace=['Yes','No'],value=[1,0],inplace=True)
part2.replace(to_replace=['Yes','No'],value=[1,0],inplace=True)
part3.replace(to_replace=['Yes','No'],value=[1,0],inplace=True)

#part0 contains Age, Location, Ethnicity and Income Data
part00=df_10.iloc[:,:8]
part01=df_10.iloc[:,15]
task1_10=pd.concat([part00,part01,part1,part2,part3], axis=1)

#Adding a column to indicate that these records belong to 2010 dataset
task1_10['year'] = '2010'
task1_10
There are 0  null values in part1 dataframe.
There are 0  null values in part2 dataframe.
There are 0  null values in part3 dataframe.
Out[13]:
Q1r1 - To begin, what is your age? Q4 - What is your gender? age - you are... Q2 - In which state do you currently reside? region - Region QNEW3 - What is your employment status? Q5 - Which category best describes your ethnicity? QNEW1 - Do you have children living in your home (excluding yourself if you are under 18)? Q6 - Into which of the following categories does your total annual household income fall before taxes? Again, we promise to keep this, and all your answers, completely confidential. Q10 - Of those products you indicated you do not currently own, which of the following do you plan to purchase in the next 12 months?-Flat panel television ... Q8 - Which of the following media or home entertainment equipment does your household own?-Dedicated e-book reader Q8 - Which of the following media or home entertainment equipment does your household own?-Smartphone Q8 - Which of the following media or home entertainment equipment does your household own?-Basic mobile phone (not a smartphone) Q8 - Which of the following media or home entertainment equipment does your household own?-Smart watch Q8 - Which of the following media or home entertainment equipment does your household own?-Fitness band Q8 - Which of the following media or home entertainment equipment does your household own?-Virtual reality headset Q8 - Which of the following media or home entertainment equipment does your household own?-Drone Q8 - Which of the following media or home entertainment equipment does your household own?-None of the above Q8 - Which of the following media or home entertainment equipment does your household own?-Don't Know year
0 29 Female 24-29 New York Northeast Employed full-time or part-time White or Caucasian (Non-Hispanic) Yes $50,000 to $99,999 1 ... 0 1 0 0 0 0 0 0 0 2010
1 29 Male 24-29 California West Employed full-time or part-time White or Caucasian (Non-Hispanic) Yes $50,000 to $99,999 0 ... 0 1 0 0 0 0 0 0 0 2010
2 29 Female 24-29 Colorado West Employed full-time or part-time White or Caucasian (Non-Hispanic) No Less than $29,999 0 ... 0 1 0 0 0 0 0 0 0 2010
3 29 Female 24-29 South Carolina South Student White or Caucasian (Non-Hispanic) Yes $30,000 to $49,999 0 ... 0 1 0 0 1 0 0 0 0 2010
4 29 Male 24-29 Virginia South Employed full-time or part-time White or Caucasian (Non-Hispanic) Yes $50,000 to $99,999 0 ... 0 1 0 1 0 0 0 0 0 2010
5 29 Male 24-29 Pennsylvania Northeast Unemployed Multiracial No $50,000 to $99,999 1 ... 0 1 1 1 0 1 0 0 0 2010
6 24 Female 24-29 Michigan Midwest Employed full-time or part-time White or Caucasian (Non-Hispanic) No Less than $29,999 0 ... 0 1 0 0 1 0 0 0 0 2010
7 29 Female 24-29 New Jersey Northeast Employed full-time or part-time White or Caucasian (Non-Hispanic) No $50,000 to $99,999 0 ... 0 1 0 0 1 0 0 0 0 2010
9 29 Male 24-29 Florida South Student Multiracial No Less than $29,999 0 ... 0 1 1 0 0 0 0 0 0 2010
10 29 Female 24-29 New York Northeast Employed full-time or part-time African American Yes $100,000 to $299,999 0 ... 0 0 1 0 1 1 1 0 0 2010
11 29 Female 24-29 California West Unemployed East Asian or Southeast Asian Yes $50,000 to $99,999 0 ... 0 1 1 1 1 0 0 0 0 2010
12 29 Female 24-29 New Jersey Northeast Employed full-time or part-time Hispanic No $50,000 to $99,999 0 ... 1 1 1 1 1 1 1 0 0 2010
13 29 Male 24-29 Maine Northeast Employed full-time or part-time White or Caucasian (Non-Hispanic) Yes $50,000 to $99,999 0 ... 0 1 0 0 0 0 0 0 0 2010
14 29 Male 24-29 Washington West Unemployed East Asian or Southeast Asian No $50,000 to $99,999 0 ... 0 1 0 0 0 0 0 0 0 2010
15 29 Male 24-29 Massachusetts Northeast Employed full-time or part-time East Asian or Southeast Asian No $100,000 to $299,999 0 ... 0 1 0 0 1 0 0 0 0 2010
16 20 Male 14-23 Wisconsin Midwest Student East Asian or Southeast Asian No Less than $29,999 0 ... 0 1 0 1 1 0 0 0 0 2010
17 29 Female 24-29 Maine Northeast Employed full-time or part-time White or Caucasian (Non-Hispanic) Yes $50,000 to $99,999 0 ... 0 1 0 0 1 0 0 0 0 2010
18 26 Male 24-29 Florida South Self-employed White or Caucasian (Non-Hispanic) Yes $50,000 to $99,999 0 ... 0 1 0 1 1 0 1 0 0 2010
19 29 Female 24-29 Tennessee South Employed full-time or part-time White or Caucasian (Non-Hispanic) No $50,000 to $99,999 0 ... 1 1 0 0 0 0 0 0 0 2010
20 29 Male 24-29 Tennessee South Student White or Caucasian (Non-Hispanic) Yes Less than $29,999 0 ... 0 0 1 0 0 0 0 0 0 2010
22 29 Female 24-29 Wisconsin Midwest Unemployed White or Caucasian (Non-Hispanic) Yes $30,000 to $49,999 0 ... 1 1 0 1 1 1 0 0 0 2010
23 29 Male 24-29 North Carolina South Employed full-time or part-time White or Caucasian (Non-Hispanic) No $30,000 to $49,999 0 ... 0 1 0 0 0 0 0 0 0 2010
24 17 Male 14-23 California West Student Hispanic Yes $50,000 to $99,999 0 ... 0 1 0 0 0 0 0 0 0 2010
25 29 Male 24-29 Missouri Midwest Employed full-time or part-time White or Caucasian (Non-Hispanic) No $50,000 to $99,999 0 ... 0 1 0 1 0 0 0 0 0 2010
26 29 Male 24-29 Ohio Midwest Employed full-time or part-time White or Caucasian (Non-Hispanic) No Less than $29,999 0 ... 0 1 0 0 0 0 0 0 0 2010
27 29 Female 24-29 New Jersey Northeast Employed full-time or part-time White or Caucasian (Non-Hispanic) No $50,000 to $99,999 0 ... 1 1 0 0 0 0 0 0 0 2010
28 29 Female 24-29 Texas South Unemployed White or Caucasian (Non-Hispanic) No $30,000 to $49,999 0 ... 0 1 0 0 0 0 0 0 0 2010
29 24 Female 24-29 Arizona West Student White or Caucasian (Non-Hispanic) No $30,000 to $49,999 0 ... 1 1 0 0 0 0 0 0 0 2010
30 29 Female 24-29 California West Unemployed Hispanic Yes Less than $29,999 0 ... 0 1 0 0 1 0 0 0 0 2010
31 29 Female 24-29 California West Employed full-time or part-time East Asian or Southeast Asian No $50,000 to $99,999 0 ... 0 1 0 0 0 0 0 0 0 2010
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
2169 70 Male 66 or older Arizona West Employed full-time or part-time Multiracial No $100,000 to $299,999 0 ... 0 1 1 0 0 0 0 0 0 2010
2171 70 Male 66 or older New York Northeast Employed full-time or part-time White or Caucasian (Non-Hispanic) No $50,000 to $99,999 0 ... 1 0 0 0 0 0 0 0 0 2010
2172 70 Male 66 or older New Hampshire Northeast Retired White or Caucasian (Non-Hispanic) No $50,000 to $99,999 0 ... 1 0 1 0 0 0 0 0 0 2010
2173 70 Male 66 or older New Mexico West Retired White or Caucasian (Non-Hispanic) No $30,000 to $49,999 0 ... 0 0 0 0 0 0 0 0 0 2010
2174 70 Male 66 or older Illinois Midwest Retired White or Caucasian (Non-Hispanic) No $50,000 to $99,999 1 ... 0 1 0 0 1 0 0 0 0 2010
2175 70 Female 66 or older Texas South Unemployed Multiracial No $30,000 to $49,999 0 ... 0 1 0 0 0 0 0 0 0 2010
2176 70 Female 66 or older Texas South Retired White or Caucasian (Non-Hispanic) No $30,000 to $49,999 0 ... 0 0 1 0 0 0 0 0 0 2010
2177 70 Female 66 or older Ohio Midwest Retired Hispanic No Less than $29,999 1 ... 0 0 1 0 0 0 0 0 0 2010
2178 70 Male 66 or older New York Northeast Retired White or Caucasian (Non-Hispanic) No $50,000 to $99,999 0 ... 1 1 1 0 0 0 0 0 0 2010
2179 70 Female 66 or older South Dakota Midwest Retired White or Caucasian (Non-Hispanic) No $50,000 to $99,999 1 ... 0 0 1 0 0 0 0 0 0 2010
2180 70 Male 66 or older Florida South Employed full-time or part-time White or Caucasian (Non-Hispanic) No $100,000 to $299,999 0 ... 0 1 0 0 0 0 0 0 0 2010
2182 70 Male 66 or older South Carolina South Retired White or Caucasian (Non-Hispanic) No $50,000 to $99,999 0 ... 0 0 1 0 0 0 0 0 0 2010
2183 70 Male 66 or older Texas South Retired White or Caucasian (Non-Hispanic) No $50,000 to $99,999 0 ... 0 0 1 0 0 0 0 0 0 2010
2184 70 Male 66 or older District of Columbia South Retired African American No $100,000 to $299,999 0 ... 0 0 0 0 0 0 0 0 0 2010
2186 70 Male 66 or older Texas South Retired Hispanic No $100,000 to $299,999 1 ... 0 1 0 0 0 0 0 0 0 2010
2187 70 Male 66 or older Maryland South Self-employed African American No $100,000 to $299,999 0 ... 0 1 1 0 0 0 0 0 0 2010
2188 70 Male 66 or older Kentucky South Retired White or Caucasian (Non-Hispanic) No $50,000 to $99,999 0 ... 1 1 0 0 1 0 0 0 0 2010
2189 70 Female 66 or older Illinois Midwest Employed full-time or part-time White or Caucasian (Non-Hispanic) No $50,000 to $99,999 0 ... 0 1 0 0 0 0 0 0 0 2010
2190 70 Male 66 or older Ohio Midwest Employed full-time or part-time White or Caucasian (Non-Hispanic) No $30,000 to $49,999 0 ... 0 0 1 0 0 0 0 0 0 2010
2191 70 Female 66 or older Tennessee South Retired White or Caucasian (Non-Hispanic) No $30,000 to $49,999 0 ... 0 0 1 0 0 0 0 0 0 2010
2192 70 Female 66 or older Wisconsin Midwest Retired White or Caucasian (Non-Hispanic) No $30,000 to $49,999 0 ... 0 0 1 0 1 0 0 0 0 2010
2194 25 Male 24-29 Illinois Midwest Employed full-time or part-time White or Caucasian (Non-Hispanic) Yes $50,000 to $99,999 0 ... 0 1 0 0 0 0 0 0 0 2010
2196 24 Male 24-29 Minnesota Midwest Employed full-time or part-time White or Caucasian (Non-Hispanic) No $30,000 to $49,999 0 ... 0 1 0 0 0 0 0 0 0 2010
2197 25 Male 24-29 Virginia South Self-employed White or Caucasian (Non-Hispanic) No Less than $29,999 0 ... 0 1 1 0 0 0 0 0 0 2010
2198 25 Male 24-29 Florida South Employed full-time or part-time White or Caucasian (Non-Hispanic) Yes $50,000 to $99,999 0 ... 0 1 0 0 0 0 0 0 0 2010
2199 71 Male 66 or older Pennsylvania Northeast Retired White or Caucasian (Non-Hispanic) No Less than $29,999 0 ... 0 0 1 0 0 0 0 0 0 2010
2200 74 Male 66 or older California West Unemployed White or Caucasian (Non-Hispanic) Yes $30,000 to $49,999 0 ... 0 1 0 0 0 0 0 0 0 2010
2202 78 Female 66 or older Connecticut Northeast Employed full-time or part-time White or Caucasian (Non-Hispanic) No Less than $29,999 0 ... 0 1 0 0 0 0 0 0 0 2010
2203 79 Male 66 or older Maryland South Retired White or Caucasian (Non-Hispanic) No $30,000 to $49,999 0 ... 0 0 1 0 0 0 0 0 0 2010
2204 87 Female 66 or older California West Retired White or Caucasian (Non-Hispanic) No Less than $29,999 0 ... 0 0 1 0 0 0 0 0 0 2010

2035 rows × 64 columns

In [14]:
#Renaming of columns that have slight difference in column names between datafram for 2010 and 2011


old_name_1 = "Q10 - Of those products you indicated you do not currently own, which of the following do you plan to purchase in the next 12 months?-Basic mobile phone"
new_name_1 = "Q10 - Of those products you indicated you do not currently own, which of the following do you plan to purchase in the next 12 months?-Basic mobile phone (not a smartphone)"


old_name_2 = "Q26 - Which of the following subscriptions does your household purchase?-Pay TV (cable and/or satellite)"
new_name_2 = "Q26 - Which of the following subscriptions does your household purchase?-Pay TV (traditional cable and/or satellite bundle)"


#Renaming the columns
task1_10.rename(columns = {old_name_1: new_name_1,
                         old_name_2: new_name_2 }, 
                                 inplace = True)
   
task1_10    
Out[14]:
Q1r1 - To begin, what is your age? Q4 - What is your gender? age - you are... Q2 - In which state do you currently reside? region - Region QNEW3 - What is your employment status? Q5 - Which category best describes your ethnicity? QNEW1 - Do you have children living in your home (excluding yourself if you are under 18)? Q6 - Into which of the following categories does your total annual household income fall before taxes? Again, we promise to keep this, and all your answers, completely confidential. Q10 - Of those products you indicated you do not currently own, which of the following do you plan to purchase in the next 12 months?-Flat panel television ... Q8 - Which of the following media or home entertainment equipment does your household own?-Dedicated e-book reader Q8 - Which of the following media or home entertainment equipment does your household own?-Smartphone Q8 - Which of the following media or home entertainment equipment does your household own?-Basic mobile phone (not a smartphone) Q8 - Which of the following media or home entertainment equipment does your household own?-Smart watch Q8 - Which of the following media or home entertainment equipment does your household own?-Fitness band Q8 - Which of the following media or home entertainment equipment does your household own?-Virtual reality headset Q8 - Which of the following media or home entertainment equipment does your household own?-Drone Q8 - Which of the following media or home entertainment equipment does your household own?-None of the above Q8 - Which of the following media or home entertainment equipment does your household own?-Don't Know year
0 29 Female 24-29 New York Northeast Employed full-time or part-time White or Caucasian (Non-Hispanic) Yes $50,000 to $99,999 1 ... 0 1 0 0 0 0 0 0 0 2010
1 29 Male 24-29 California West Employed full-time or part-time White or Caucasian (Non-Hispanic) Yes $50,000 to $99,999 0 ... 0 1 0 0 0 0 0 0 0 2010
2 29 Female 24-29 Colorado West Employed full-time or part-time White or Caucasian (Non-Hispanic) No Less than $29,999 0 ... 0 1 0 0 0 0 0 0 0 2010
3 29 Female 24-29 South Carolina South Student White or Caucasian (Non-Hispanic) Yes $30,000 to $49,999 0 ... 0 1 0 0 1 0 0 0 0 2010
4 29 Male 24-29 Virginia South Employed full-time or part-time White or Caucasian (Non-Hispanic) Yes $50,000 to $99,999 0 ... 0 1 0 1 0 0 0 0 0 2010
5 29 Male 24-29 Pennsylvania Northeast Unemployed Multiracial No $50,000 to $99,999 1 ... 0 1 1 1 0 1 0 0 0 2010
6 24 Female 24-29 Michigan Midwest Employed full-time or part-time White or Caucasian (Non-Hispanic) No Less than $29,999 0 ... 0 1 0 0 1 0 0 0 0 2010
7 29 Female 24-29 New Jersey Northeast Employed full-time or part-time White or Caucasian (Non-Hispanic) No $50,000 to $99,999 0 ... 0 1 0 0 1 0 0 0 0 2010
9 29 Male 24-29 Florida South Student Multiracial No Less than $29,999 0 ... 0 1 1 0 0 0 0 0 0 2010
10 29 Female 24-29 New York Northeast Employed full-time or part-time African American Yes $100,000 to $299,999 0 ... 0 0 1 0 1 1 1 0 0 2010
11 29 Female 24-29 California West Unemployed East Asian or Southeast Asian Yes $50,000 to $99,999 0 ... 0 1 1 1 1 0 0 0 0 2010
12 29 Female 24-29 New Jersey Northeast Employed full-time or part-time Hispanic No $50,000 to $99,999 0 ... 1 1 1 1 1 1 1 0 0 2010
13 29 Male 24-29 Maine Northeast Employed full-time or part-time White or Caucasian (Non-Hispanic) Yes $50,000 to $99,999 0 ... 0 1 0 0 0 0 0 0 0 2010
14 29 Male 24-29 Washington West Unemployed East Asian or Southeast Asian No $50,000 to $99,999 0 ... 0 1 0 0 0 0 0 0 0 2010
15 29 Male 24-29 Massachusetts Northeast Employed full-time or part-time East Asian or Southeast Asian No $100,000 to $299,999 0 ... 0 1 0 0 1 0 0 0 0 2010
16 20 Male 14-23 Wisconsin Midwest Student East Asian or Southeast Asian No Less than $29,999 0 ... 0 1 0 1 1 0 0 0 0 2010
17 29 Female 24-29 Maine Northeast Employed full-time or part-time White or Caucasian (Non-Hispanic) Yes $50,000 to $99,999 0 ... 0 1 0 0 1 0 0 0 0 2010
18 26 Male 24-29 Florida South Self-employed White or Caucasian (Non-Hispanic) Yes $50,000 to $99,999 0 ... 0 1 0 1 1 0 1 0 0 2010
19 29 Female 24-29 Tennessee South Employed full-time or part-time White or Caucasian (Non-Hispanic) No $50,000 to $99,999 0 ... 1 1 0 0 0 0 0 0 0 2010
20 29 Male 24-29 Tennessee South Student White or Caucasian (Non-Hispanic) Yes Less than $29,999 0 ... 0 0 1 0 0 0 0 0 0 2010
22 29 Female 24-29 Wisconsin Midwest Unemployed White or Caucasian (Non-Hispanic) Yes $30,000 to $49,999 0 ... 1 1 0 1 1 1 0 0 0 2010
23 29 Male 24-29 North Carolina South Employed full-time or part-time White or Caucasian (Non-Hispanic) No $30,000 to $49,999 0 ... 0 1 0 0 0 0 0 0 0 2010
24 17 Male 14-23 California West Student Hispanic Yes $50,000 to $99,999 0 ... 0 1 0 0 0 0 0 0 0 2010
25 29 Male 24-29 Missouri Midwest Employed full-time or part-time White or Caucasian (Non-Hispanic) No $50,000 to $99,999 0 ... 0 1 0 1 0 0 0 0 0 2010
26 29 Male 24-29 Ohio Midwest Employed full-time or part-time White or Caucasian (Non-Hispanic) No Less than $29,999 0 ... 0 1 0 0 0 0 0 0 0 2010
27 29 Female 24-29 New Jersey Northeast Employed full-time or part-time White or Caucasian (Non-Hispanic) No $50,000 to $99,999 0 ... 1 1 0 0 0 0 0 0 0 2010
28 29 Female 24-29 Texas South Unemployed White or Caucasian (Non-Hispanic) No $30,000 to $49,999 0 ... 0 1 0 0 0 0 0 0 0 2010
29 24 Female 24-29 Arizona West Student White or Caucasian (Non-Hispanic) No $30,000 to $49,999 0 ... 1 1 0 0 0 0 0 0 0 2010
30 29 Female 24-29 California West Unemployed Hispanic Yes Less than $29,999 0 ... 0 1 0 0 1 0 0 0 0 2010
31 29 Female 24-29 California West Employed full-time or part-time East Asian or Southeast Asian No $50,000 to $99,999 0 ... 0 1 0 0 0 0 0 0 0 2010
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
2169 70 Male 66 or older Arizona West Employed full-time or part-time Multiracial No $100,000 to $299,999 0 ... 0 1 1 0 0 0 0 0 0 2010
2171 70 Male 66 or older New York Northeast Employed full-time or part-time White or Caucasian (Non-Hispanic) No $50,000 to $99,999 0 ... 1 0 0 0 0 0 0 0 0 2010
2172 70 Male 66 or older New Hampshire Northeast Retired White or Caucasian (Non-Hispanic) No $50,000 to $99,999 0 ... 1 0 1 0 0 0 0 0 0 2010
2173 70 Male 66 or older New Mexico West Retired White or Caucasian (Non-Hispanic) No $30,000 to $49,999 0 ... 0 0 0 0 0 0 0 0 0 2010
2174 70 Male 66 or older Illinois Midwest Retired White or Caucasian (Non-Hispanic) No $50,000 to $99,999 1 ... 0 1 0 0 1 0 0 0 0 2010
2175 70 Female 66 or older Texas South Unemployed Multiracial No $30,000 to $49,999 0 ... 0 1 0 0 0 0 0 0 0 2010
2176 70 Female 66 or older Texas South Retired White or Caucasian (Non-Hispanic) No $30,000 to $49,999 0 ... 0 0 1 0 0 0 0 0 0 2010
2177 70 Female 66 or older Ohio Midwest Retired Hispanic No Less than $29,999 1 ... 0 0 1 0 0 0 0 0 0 2010
2178 70 Male 66 or older New York Northeast Retired White or Caucasian (Non-Hispanic) No $50,000 to $99,999 0 ... 1 1 1 0 0 0 0 0 0 2010
2179 70 Female 66 or older South Dakota Midwest Retired White or Caucasian (Non-Hispanic) No $50,000 to $99,999 1 ... 0 0 1 0 0 0 0 0 0 2010
2180 70 Male 66 or older Florida South Employed full-time or part-time White or Caucasian (Non-Hispanic) No $100,000 to $299,999 0 ... 0 1 0 0 0 0 0 0 0 2010
2182 70 Male 66 or older South Carolina South Retired White or Caucasian (Non-Hispanic) No $50,000 to $99,999 0 ... 0 0 1 0 0 0 0 0 0 2010
2183 70 Male 66 or older Texas South Retired White or Caucasian (Non-Hispanic) No $50,000 to $99,999 0 ... 0 0 1 0 0 0 0 0 0 2010
2184 70 Male 66 or older District of Columbia South Retired African American No $100,000 to $299,999 0 ... 0 0 0 0 0 0 0 0 0 2010
2186 70 Male 66 or older Texas South Retired Hispanic No $100,000 to $299,999 1 ... 0 1 0 0 0 0 0 0 0 2010
2187 70 Male 66 or older Maryland South Self-employed African American No $100,000 to $299,999 0 ... 0 1 1 0 0 0 0 0 0 2010
2188 70 Male 66 or older Kentucky South Retired White or Caucasian (Non-Hispanic) No $50,000 to $99,999 0 ... 1 1 0 0 1 0 0 0 0 2010
2189 70 Female 66 or older Illinois Midwest Employed full-time or part-time White or Caucasian (Non-Hispanic) No $50,000 to $99,999 0 ... 0 1 0 0 0 0 0 0 0 2010
2190 70 Male 66 or older Ohio Midwest Employed full-time or part-time White or Caucasian (Non-Hispanic) No $30,000 to $49,999 0 ... 0 0 1 0 0 0 0 0 0 2010
2191 70 Female 66 or older Tennessee South Retired White or Caucasian (Non-Hispanic) No $30,000 to $49,999 0 ... 0 0 1 0 0 0 0 0 0 2010
2192 70 Female 66 or older Wisconsin Midwest Retired White or Caucasian (Non-Hispanic) No $30,000 to $49,999 0 ... 0 0 1 0 1 0 0 0 0 2010
2194 25 Male 24-29 Illinois Midwest Employed full-time or part-time White or Caucasian (Non-Hispanic) Yes $50,000 to $99,999 0 ... 0 1 0 0 0 0 0 0 0 2010
2196 24 Male 24-29 Minnesota Midwest Employed full-time or part-time White or Caucasian (Non-Hispanic) No $30,000 to $49,999 0 ... 0 1 0 0 0 0 0 0 0 2010
2197 25 Male 24-29 Virginia South Self-employed White or Caucasian (Non-Hispanic) No Less than $29,999 0 ... 0 1 1 0 0 0 0 0 0 2010
2198 25 Male 24-29 Florida South Employed full-time or part-time White or Caucasian (Non-Hispanic) Yes $50,000 to $99,999 0 ... 0 1 0 0 0 0 0 0 0 2010
2199 71 Male 66 or older Pennsylvania Northeast Retired White or Caucasian (Non-Hispanic) No Less than $29,999 0 ... 0 0 1 0 0 0 0 0 0 2010
2200 74 Male 66 or older California West Unemployed White or Caucasian (Non-Hispanic) Yes $30,000 to $49,999 0 ... 0 1 0 0 0 0 0 0 0 2010
2202 78 Female 66 or older Connecticut Northeast Employed full-time or part-time White or Caucasian (Non-Hispanic) No Less than $29,999 0 ... 0 1 0 0 0 0 0 0 0 2010
2203 79 Male 66 or older Maryland South Retired White or Caucasian (Non-Hispanic) No $30,000 to $49,999 0 ... 0 0 1 0 0 0 0 0 0 2010
2204 87 Female 66 or older California West Retired White or Caucasian (Non-Hispanic) No Less than $29,999 0 ... 0 0 1 0 0 0 0 0 0 2010

2035 rows × 64 columns

In [15]:
#reassigning column names for filtering purpose of data in 2009
df_11.columns=column_names_11
df_11

#part1 contains all columns having Q10 data
part1=df_11.filter(regex='^Q10',axis=1)
#part2 contains all columns having Q26 data
part2=df_11.filter(regex='^Q26',axis=1)
# part3 contains all columns having Q8 data
part3=df_11.filter(regex='^Q8',axis=1)

#dropping column having name Q89
part3.drop(part3.columns[len(part3.columns)-1], axis=1, inplace=True)

print('There are' ,part1.isna().any().sum(),' null values in part1 dataframe.')
print('There are' ,part2.isna().any().sum(),' null values in part2 dataframe.')
print('There are' ,part3.isna().any().sum(),' null values in part3 dataframe.')

#Replacing all NaN Values with No
part1.replace(to_replace ="-9999", value ="No",inplace=True)
part2.replace(to_replace ="-9999", value ="No",inplace=True)
part3.replace(to_replace ="-9999", value ="No",inplace=True)

#replacing all Yes with 1 and No with 0
part1.replace(to_replace=['Yes','No'],value=[1,0],inplace=True)
part2.replace(to_replace=['Yes','No'],value=[1,0],inplace=True)
part3.replace(to_replace=['Yes','No'],value=[1,0],inplace=True)

#part0 contains Age, Location, Ethnicity and Income Data
part00=df_11.iloc[:,:8]
part01=df_11.iloc[:,15]
task1_11=pd.concat([part00,part01,part3,part1,part2], axis=1)

#Adding a column to indicate that these records belong to 2011 dataset
task1_11['year'] = '2011'
task1_11
There are 0  null values in part1 dataframe.
There are 0  null values in part2 dataframe.
There are 0  null values in part3 dataframe.
Out[15]:
Q1r1 - To begin, what is your age? Q4 - What is your gender? age - you are... Q2 - In which state do you currently reside? region - Region QNEW3 - What is your employment status? Q5 - Which category best describes your ethnicity? QNEW1 - Do you have children living in your home (excluding yourself if you are under 18)? Q6 - Into which of the following categories does your total annual household income fall before taxes? Again, we promise to keep this, and all your answers, completely confidential. Q8 - Which of the following media or home entertainment equipment does your household own?-Flat panel television ... Q26 - Which of the following subscriptions does your household purchase?-Mobile voice (smartphone or basic mobile phone calling plan) Q26 - Which of the following subscriptions does your household purchase?-Mobile data plan Q26 - Which of the following subscriptions does your household purchase?-Streaming video service Q26 - Which of the following subscriptions does your household purchase?-Streaming music service Q26 - Which of the following subscriptions does your household purchase?-Gaming Q26 - Which of the following subscriptions does your household purchase?-News/Newspaper (print or digital) Q26 - Which of the following subscriptions does your household purchase?-Magazine (print or digital) Q26 - Which of the following subscriptions does your household purchase?-None of the above Q26 - Which of the following subscriptions does your household purchase?-Don't Know year
0 36 Male 34-50 Georgia South Employed full-time or part-time White or Caucasian (Non-Hispanic) Yes $50,000 to $99,999 0 ... 0 0 0 0 0 1 1 0 0 2011
1 26 Female 20-26 New York Northeast Employed full-time or part-time White or Caucasian (Non-Hispanic) Yes $50,000 to $99,999 0 ... 0 1 1 0 0 0 0 0 0 2011
2 32 Female 27-33 New Jersey Northeast Employed full-time or part-time White or Caucasian (Non-Hispanic) Yes Less than $29,999 0 ... 0 1 0 0 0 0 1 0 0 2011
3 25 Female 20-26 California West Employed full-time or part-time White or Caucasian (Non-Hispanic) Yes Less than $29,999 1 ... 0 1 1 0 1 0 0 0 0 2011
4 28 Male 27-33 Indiana Midwest Student White or Caucasian (Non-Hispanic) Yes $50,000 to $99,999 1 ... 0 0 1 0 0 0 0 0 0 2011
5 33 Male 27-33 New York Northeast Self-employed White or Caucasian (Non-Hispanic) Yes $50,000 to $99,999 1 ... 0 1 0 0 1 0 1 0 0 2011
6 35 Male 34-50 California West Employed full-time or part-time White or Caucasian (Non-Hispanic) Yes $100,000 to $299,999 0 ... 0 0 1 1 0 0 0 0 0 2011
7 36 Male 34-50 Illinois Midwest Employed full-time or part-time White or Caucasian (Non-Hispanic) Yes $100,000 to $299,999 0 ... 0 0 0 0 1 0 1 0 0 2011
8 74 Male 70 or older New Jersey Northeast Retired White or Caucasian (Non-Hispanic) No Less than $29,999 1 ... 0 0 0 0 0 1 1 0 0 2011
9 16 Female 14-19 Connecticut Northeast Student White or Caucasian (Non-Hispanic) Yes $100,000 to $299,999 1 ... 1 1 0 1 0 0 0 0 0 2011
10 32 Female 27-33 New York Northeast Employed full-time or part-time White or Caucasian (Non-Hispanic) Yes $50,000 to $99,999 0 ... 1 1 1 0 1 0 1 0 0 2011
11 38 Male 34-50 Ohio Midwest Employed full-time or part-time White or Caucasian (Non-Hispanic) Yes $100,000 to $299,999 0 ... 0 0 1 1 1 0 1 0 0 2011
12 31 Male 27-33 Rhode Island Northeast Employed full-time or part-time White or Caucasian (Non-Hispanic) Yes $50,000 to $99,999 1 ... 1 0 1 0 0 0 0 0 0 2011
13 17 Female 14-19 Oregon West Unemployed White or Caucasian (Non-Hispanic) No $30,000 to $49,999 1 ... 1 1 1 0 1 0 0 0 0 2011
14 17 Male 14-19 Florida South Student White or Caucasian (Non-Hispanic) Yes Less than $29,999 0 ... 1 1 1 0 1 0 0 0 0 2011
16 22 Female 20-26 Wisconsin Midwest Employed full-time or part-time White or Caucasian (Non-Hispanic) Yes $30,000 to $49,999 1 ... 0 1 1 1 1 0 0 0 0 2011
17 70 Female 70 or older Kentucky South Retired White or Caucasian (Non-Hispanic) No Less than $29,999 1 ... 0 0 0 0 0 0 0 0 0 2011
18 67 Male 51-69 New York Northeast Retired White or Caucasian (Non-Hispanic) No Less than $29,999 0 ... 0 0 0 0 0 1 1 0 0 2011
19 43 Female 34-50 New York Northeast Unemployed African American Yes Less than $29,999 1 ... 1 0 1 0 1 0 0 0 0 2011
20 45 Female 34-50 Indiana Midwest Employed full-time or part-time White or Caucasian (Non-Hispanic) No Less than $29,999 1 ... 1 1 1 0 0 0 0 0 0 2011
21 21 Female 20-26 Texas South Employed full-time or part-time African American Yes Less than $29,999 1 ... 1 1 1 1 1 1 1 0 0 2011
24 52 Male 51-69 Michigan Midwest Employed full-time or part-time White or Caucasian (Non-Hispanic) No $30,000 to $49,999 0 ... 0 0 0 0 0 0 0 1 0 2011
25 32 Female 27-33 Illinois Midwest Employed full-time or part-time South Asian (India, Pakistan, Sri Lanka) Yes $50,000 to $99,999 1 ... 1 1 1 0 0 0 0 0 0 2011
26 25 Female 20-26 New York Northeast Employed full-time or part-time White or Caucasian (Non-Hispanic) No $100,000 to $299,999 0 ... 1 1 1 0 1 1 0 0 0 2011
27 24 Male 20-26 New York Northeast Employed full-time or part-time White or Caucasian (Non-Hispanic) No $50,000 to $99,999 0 ... 0 0 0 0 1 0 0 0 0 2011
28 44 Female 34-50 Connecticut Northeast Unemployed White or Caucasian (Non-Hispanic) Yes $50,000 to $99,999 1 ... 1 1 0 0 1 0 1 0 0 2011
29 47 Female 34-50 Iowa Midwest Unemployed White or Caucasian (Non-Hispanic) Yes $30,000 to $49,999 1 ... 1 1 1 0 0 0 0 0 0 2011
30 66 Female 51-69 New Jersey Northeast Employed full-time or part-time White or Caucasian (Non-Hispanic) No Less than $29,999 1 ... 0 1 1 0 0 1 1 0 0 2011
31 49 Female 34-50 Illinois Midwest Unemployed White or Caucasian (Non-Hispanic) No Less than $29,999 0 ... 0 0 0 0 0 0 0 0 0 2011
32 17 Male 14-19 Virginia South Student White or Caucasian (Non-Hispanic) No $50,000 to $99,999 1 ... 1 1 0 0 1 1 1 0 0 2011
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
2096 71 Male 70 or older South Carolina South Retired Multiracial No $30,000 to $49,999 1 ... 0 0 0 0 0 0 0 0 0 2011
2097 73 Male 70 or older Missouri Midwest Retired White or Caucasian (Non-Hispanic) No $50,000 to $99,999 1 ... 1 1 0 0 0 0 1 0 0 2011
2099 73 Male 70 or older Florida South Self-employed White or Caucasian (Non-Hispanic) No Less than $29,999 1 ... 1 1 1 1 1 0 0 0 0 2011
2100 18 Male 14-19 Louisiana South Student White or Caucasian (Non-Hispanic) No $50,000 to $99,999 0 ... 1 1 1 1 1 1 0 0 0 2011
2101 18 Female 14-19 South Dakota Midwest Unemployed White or Caucasian (Non-Hispanic) No $50,000 to $99,999 0 ... 0 1 0 0 0 0 1 0 0 2011
2102 15 Male 14-19 New York Northeast Self-employed White or Caucasian (Non-Hispanic) Yes $100,000 to $299,999 1 ... 1 1 1 1 1 1 1 0 0 2011
2103 15 Female 14-19 Florida South Employed full-time or part-time White or Caucasian (Non-Hispanic) Yes $100,000 to $299,999 1 ... 1 1 1 0 0 1 1 0 0 2011
2105 71 Female 70 or older Idaho West Retired White or Caucasian (Non-Hispanic) No Less than $29,999 1 ... 0 0 0 0 0 0 0 0 0 2011
2106 76 Female 70 or older Wisconsin Midwest Retired White or Caucasian (Non-Hispanic) No $50,000 to $99,999 1 ... 0 1 0 0 0 1 1 0 0 2011
2107 18 Female 14-19 California West Student Hispanic No $30,000 to $49,999 0 ... 0 0 0 0 0 0 0 1 0 2011
2109 73 Female 70 or older Pennsylvania Northeast Retired White or Caucasian (Non-Hispanic) No $30,000 to $49,999 1 ... 0 0 1 0 0 0 0 0 0 2011
2110 71 Male 70 or older Virginia South Retired White or Caucasian (Non-Hispanic) No Less than $29,999 0 ... 0 0 0 0 0 0 0 0 0 2011
2111 77 Female 70 or older Michigan Midwest Retired Multiracial No $30,000 to $49,999 0 ... 0 0 0 0 0 0 0 0 0 2011
2112 18 Male 14-19 Texas South Student White or Caucasian (Non-Hispanic) Yes $30,000 to $49,999 1 ... 1 0 1 0 0 0 0 0 0 2011
2113 71 Male 70 or older Alabama South Retired White or Caucasian (Non-Hispanic) No $50,000 to $99,999 1 ... 1 1 1 1 0 0 1 0 0 2011
2115 14 Male 14-19 Kansas Midwest Self-employed White or Caucasian (Non-Hispanic) Yes $50,000 to $99,999 0 ... 0 0 1 1 1 0 1 0 0 2011
2116 70 Female 70 or older Florida South Retired White or Caucasian (Non-Hispanic) No Less than $29,999 0 ... 0 0 0 0 0 0 0 0 0 2011
2117 19 Female 14-19 Maryland South Student White or Caucasian (Non-Hispanic) No $100,000 to $299,999 1 ... 0 1 1 1 0 1 1 0 0 2011
2118 15 Male 14-19 Florida South Student White or Caucasian (Non-Hispanic) No $50,000 to $99,999 1 ... 1 1 0 1 0 1 0 0 0 2011
2119 14 Male 14-19 Illinois Midwest Student White or Caucasian (Non-Hispanic) Yes $100,000 to $299,999 0 ... 1 1 0 0 0 0 0 0 0 2011
2120 70 Female 70 or older Washington West Retired White or Caucasian (Non-Hispanic) No $50,000 to $99,999 1 ... 0 0 0 0 0 1 1 0 0 2011
2121 19 Female 14-19 Virginia South Unemployed White or Caucasian (Non-Hispanic) Yes Less than $29,999 0 ... 0 1 1 1 0 0 0 0 0 2011
2122 18 Male 14-19 Florida South Student White or Caucasian (Non-Hispanic) No $100,000 to $299,999 1 ... 1 1 1 1 1 1 1 0 0 2011
2124 15 Female 14-19 Oregon West Student White or Caucasian (Non-Hispanic) Yes $100,000 to $299,999 0 ... 0 1 1 0 1 0 0 0 0 2011
2125 14 Male 14-19 Ohio Midwest Student Multiracial Yes Less than $29,999 1 ... 0 0 1 0 0 0 0 0 0 2011
2126 14 Male 14-19 Oregon West Unemployed White or Caucasian (Non-Hispanic) Yes $100,000 to $299,999 1 ... 0 0 0 0 0 0 1 0 0 2011
2127 70 Male 70 or older Massachusetts Northeast Retired White or Caucasian (Non-Hispanic) No $50,000 to $99,999 1 ... 0 0 0 0 0 0 0 0 0 2011
2128 18 Male 14-19 Alabama South Unemployed White or Caucasian (Non-Hispanic) No $50,000 to $99,999 1 ... 0 0 0 0 1 0 0 0 0 2011
2129 79 Female 70 or older Illinois Midwest Retired White or Caucasian (Non-Hispanic) No Less than $29,999 1 ... 0 0 0 0 0 1 1 0 0 2011
2130 77 Female 70 or older Pennsylvania Northeast Retired Multiracial No $30,000 to $49,999 0 ... 1 0 0 0 0 0 0 0 0 2011

2011 rows × 64 columns

Business Case 1 : Bundling of products and subscriptions based on currently owned products and subscriptions and products to be owned in the future

In [16]:
#concatenating the 2 dataframes 
merged_df = pd.concat([task1_10 , task1_11], axis=0).reset_index()
merged_df

#add default quantity column and assume the value of 1
merged_df['quantity'] = 1
merged_df

#Changing column names for the consumer feaatures
o1 = "Q1r1 - To begin, what is your age?"
n1 = "Age"

o2 = "Q2 - In which state do you currently reside?"
n2 ="State"

o3 = "Q4 - What is your gender?"
n3 = "Gender"

o4 = "Q5 - Which category best describes your ethnicity?"
n4 = "Ethnicity"

o5 = "Q6 - Into which of the following categories does your total annual household income fall before taxes? Again, we promise to keep this, and all your answers, completely confidential."
n5 = "Household Income"

o6 = "QNEW1 - Do you have children living in your home (excluding yourself if you are under 18)?"
n6 = "Children in Household?"

o7 = "QNEW3 - What is your employment status?"
n7 = "Employment status"

o8 = "age - you are..."
n8 = "Age range"

o9 = "region - Region"
n9 = "Region"


#Renaming the columns
merged_df.rename(columns = {o1 :n1 , o2:n2 ,  o3:n3 ,  o4:n4 ,  o5:n5 ,  o6:n6 ,  o7:n7 ,  o8:n8 ,  o9:n9 }, 
                                 inplace = True)

Q8 - Products

In [17]:
# Creating a dataframe consisting of only consumer features and Q8 columns
df_temp = merged_df

Q8_cols = [col for col in merged_df.columns if 'Q8' in col]

#dataframe with columns for Question 8 
df_temp_1=df_temp.loc[:, df_temp.columns.isin(Q8_cols)]

temp = [col for col in merged_df.columns if 'Q8' not in col]
temp = [col for col in temp if 'Q10' not in col]
temp = [col for col in temp if 'Q26' not in col]

#dataframe with columns of consumer feature
df_temp_2=df_temp.loc[:, df_temp.columns.isin(temp)]
df_temp_2['quantity'] = 1


#combining the two dataframes
df_Q_8=pd.concat([df_temp_2 , df_temp_1 ], axis=1)
df_Q_8
Out[17]:
index Age State Gender Ethnicity Household Income Children in Household? Employment status Age range Region ... Q8 - Which of the following media or home entertainment equipment does your household own?-Laptop computer Q8 - Which of the following media or home entertainment equipment does your household own?-None of the above Q8 - Which of the following media or home entertainment equipment does your household own?-Over-the-air digital TV antenna (for free access to network broadcast without pay TV subscription) Q8 - Which of the following media or home entertainment equipment does your household own?-Portable streaming thumb drive/fob Q8 - Which of the following media or home entertainment equipment does your household own?-Portable video game player Q8 - Which of the following media or home entertainment equipment does your household own?-Smart watch Q8 - Which of the following media or home entertainment equipment does your household own?-Smartphone Q8 - Which of the following media or home entertainment equipment does your household own?-Streaming media box or over-the-top box Q8 - Which of the following media or home entertainment equipment does your household own?-Tablet Q8 - Which of the following media or home entertainment equipment does your household own?-Virtual reality headset
0 0 29 New York Female White or Caucasian (Non-Hispanic) $50,000 to $99,999 Yes Employed full-time or part-time 24-29 Northeast ... 1 0 1 0 0 0 1 1 1 0
1 1 29 California Male White or Caucasian (Non-Hispanic) $50,000 to $99,999 Yes Employed full-time or part-time 24-29 West ... 1 0 0 1 1 0 1 0 1 0
2 2 29 Colorado Female White or Caucasian (Non-Hispanic) Less than $29,999 No Employed full-time or part-time 24-29 West ... 0 0 0 0 0 0 1 0 0 0
3 3 29 South Carolina Female White or Caucasian (Non-Hispanic) $30,000 to $49,999 Yes Student 24-29 South ... 1 0 1 1 0 0 1 1 1 0
4 4 29 Virginia Male White or Caucasian (Non-Hispanic) $50,000 to $99,999 Yes Employed full-time or part-time 24-29 South ... 1 0 0 0 0 1 1 0 1 0
5 5 29 Pennsylvania Male Multiracial $50,000 to $99,999 No Unemployed 24-29 Northeast ... 1 0 1 1 0 1 1 0 0 1
6 6 24 Michigan Female White or Caucasian (Non-Hispanic) Less than $29,999 No Employed full-time or part-time 24-29 Midwest ... 1 0 0 0 0 0 1 1 1 0
7 7 29 New Jersey Female White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Employed full-time or part-time 24-29 Northeast ... 1 0 1 0 0 0 1 0 0 0
8 9 29 Florida Male Multiracial Less than $29,999 No Student 24-29 South ... 1 0 0 0 0 0 1 0 0 0
9 10 29 New York Female African American $100,000 to $299,999 Yes Employed full-time or part-time 24-29 Northeast ... 1 0 0 0 1 0 0 0 1 1
10 11 29 California Female East Asian or Southeast Asian $50,000 to $99,999 Yes Unemployed 24-29 West ... 1 0 1 1 0 1 1 0 0 0
11 12 29 New Jersey Female Hispanic $50,000 to $99,999 No Employed full-time or part-time 24-29 Northeast ... 1 0 1 1 1 1 1 1 1 1
12 13 29 Maine Male White or Caucasian (Non-Hispanic) $50,000 to $99,999 Yes Employed full-time or part-time 24-29 Northeast ... 1 0 0 1 0 0 1 1 1 0
13 14 29 Washington Male East Asian or Southeast Asian $50,000 to $99,999 No Unemployed 24-29 West ... 1 0 0 0 0 0 1 0 0 0
14 15 29 Massachusetts Male East Asian or Southeast Asian $100,000 to $299,999 No Employed full-time or part-time 24-29 Northeast ... 1 0 0 0 0 0 1 0 1 0
15 16 20 Wisconsin Male East Asian or Southeast Asian Less than $29,999 No Student 14-23 Midwest ... 1 0 0 1 1 1 1 0 1 0
16 17 29 Maine Female White or Caucasian (Non-Hispanic) $50,000 to $99,999 Yes Employed full-time or part-time 24-29 Northeast ... 1 0 1 0 0 0 1 0 1 0
17 18 26 Florida Male White or Caucasian (Non-Hispanic) $50,000 to $99,999 Yes Self-employed 24-29 South ... 1 0 1 0 0 1 1 0 1 0
18 19 29 Tennessee Female White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Employed full-time or part-time 24-29 South ... 1 0 0 0 1 0 1 0 1 0
19 20 29 Tennessee Male White or Caucasian (Non-Hispanic) Less than $29,999 Yes Student 24-29 South ... 0 0 0 0 1 0 0 1 1 0
20 22 29 Wisconsin Female White or Caucasian (Non-Hispanic) $30,000 to $49,999 Yes Unemployed 24-29 Midwest ... 1 0 0 1 1 1 1 0 1 1
21 23 29 North Carolina Male White or Caucasian (Non-Hispanic) $30,000 to $49,999 No Employed full-time or part-time 24-29 South ... 1 0 1 0 0 0 1 0 1 0
22 24 17 California Male Hispanic $50,000 to $99,999 Yes Student 14-23 West ... 1 0 0 0 1 0 1 0 1 0
23 25 29 Missouri Male White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Employed full-time or part-time 24-29 Midwest ... 1 0 0 0 0 1 1 0 1 0
24 26 29 Ohio Male White or Caucasian (Non-Hispanic) Less than $29,999 No Employed full-time or part-time 24-29 Midwest ... 1 0 0 0 0 0 1 0 0 0
25 27 29 New Jersey Female White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Employed full-time or part-time 24-29 Northeast ... 1 0 0 1 1 0 1 1 1 0
26 28 29 Texas Female White or Caucasian (Non-Hispanic) $30,000 to $49,999 No Unemployed 24-29 South ... 1 0 0 0 0 0 1 0 0 0
27 29 24 Arizona Female White or Caucasian (Non-Hispanic) $30,000 to $49,999 No Student 24-29 West ... 1 0 0 0 1 0 1 0 1 0
28 30 29 California Female Hispanic Less than $29,999 Yes Unemployed 24-29 West ... 1 0 0 0 0 0 1 0 1 0
29 31 29 California Female East Asian or Southeast Asian $50,000 to $99,999 No Employed full-time or part-time 24-29 West ... 1 0 1 0 0 0 1 0 1 0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
4016 2096 71 South Carolina Male Multiracial $30,000 to $49,999 No Retired 70 or older South ... 0 0 0 0 0 0 0 0 0 0
4017 2097 73 Missouri Male White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Retired 70 or older Midwest ... 1 0 0 0 0 0 1 0 1 0
4018 2099 73 Florida Male White or Caucasian (Non-Hispanic) Less than $29,999 No Self-employed 70 or older South ... 1 0 0 1 1 1 1 0 1 1
4019 2100 18 Louisiana Male White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Student 14-19 South ... 0 0 1 1 1 0 1 0 1 0
4020 2101 18 South Dakota Female White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Unemployed 14-19 Midwest ... 1 0 0 0 0 0 0 0 0 0
4021 2102 15 New York Male White or Caucasian (Non-Hispanic) $100,000 to $299,999 Yes Self-employed 14-19 Northeast ... 1 0 1 1 1 1 1 1 1 1
4022 2103 15 Florida Female White or Caucasian (Non-Hispanic) $100,000 to $299,999 Yes Employed full-time or part-time 14-19 South ... 1 0 1 0 1 0 1 0 0 0
4023 2105 71 Idaho Female White or Caucasian (Non-Hispanic) Less than $29,999 No Retired 70 or older West ... 0 0 0 0 0 0 0 0 0 0
4024 2106 76 Wisconsin Female White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Retired 70 or older Midwest ... 1 0 0 0 0 0 0 0 0 0
4025 2107 18 California Female Hispanic $30,000 to $49,999 No Student 14-19 West ... 1 0 0 0 1 0 1 0 1 0
4026 2109 73 Pennsylvania Female White or Caucasian (Non-Hispanic) $30,000 to $49,999 No Retired 70 or older Northeast ... 1 0 0 1 0 0 1 0 1 0
4027 2110 71 Virginia Male White or Caucasian (Non-Hispanic) Less than $29,999 No Retired 70 or older South ... 1 0 0 0 0 0 0 0 1 0
4028 2111 77 Michigan Female Multiracial $30,000 to $49,999 No Retired 70 or older Midwest ... 0 0 0 0 0 0 0 0 0 0
4029 2112 18 Texas Male White or Caucasian (Non-Hispanic) $30,000 to $49,999 Yes Student 14-19 South ... 1 0 1 0 0 0 1 0 1 0
4030 2113 71 Alabama Male White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Retired 70 or older South ... 1 0 0 0 0 0 1 0 1 0
4031 2115 14 Kansas Male White or Caucasian (Non-Hispanic) $50,000 to $99,999 Yes Self-employed 14-19 Midwest ... 1 0 0 0 0 1 1 0 1 1
4032 2116 70 Florida Female White or Caucasian (Non-Hispanic) Less than $29,999 No Retired 70 or older South ... 1 0 1 0 0 0 0 0 0 0
4033 2117 19 Maryland Female White or Caucasian (Non-Hispanic) $100,000 to $299,999 No Student 14-19 South ... 1 0 0 0 1 0 1 0 1 0
4034 2118 15 Florida Male White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Student 14-19 South ... 1 0 0 0 0 0 0 1 1 1
4035 2119 14 Illinois Male White or Caucasian (Non-Hispanic) $100,000 to $299,999 Yes Student 14-19 Midwest ... 0 0 0 0 0 0 0 0 0 0
4036 2120 70 Washington Female White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Retired 70 or older West ... 1 0 0 0 0 0 0 0 0 0
4037 2121 19 Virginia Female White or Caucasian (Non-Hispanic) Less than $29,999 Yes Unemployed 14-19 South ... 1 0 0 0 1 0 1 0 1 0
4038 2122 18 Florida Male White or Caucasian (Non-Hispanic) $100,000 to $299,999 No Student 14-19 South ... 1 0 0 0 0 0 1 1 1 0
4039 2124 15 Oregon Female White or Caucasian (Non-Hispanic) $100,000 to $299,999 Yes Student 14-19 West ... 1 0 0 0 0 0 1 0 1 0
4040 2125 14 Ohio Male Multiracial Less than $29,999 Yes Student 14-19 Midwest ... 0 0 0 0 0 0 1 0 1 1
4041 2126 14 Oregon Male White or Caucasian (Non-Hispanic) $100,000 to $299,999 Yes Unemployed 14-19 West ... 1 0 0 0 0 0 0 0 1 0
4042 2127 70 Massachusetts Male White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Retired 70 or older Northeast ... 1 0 1 0 0 0 1 1 1 0
4043 2128 18 Alabama Male White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Unemployed 14-19 South ... 0 0 0 0 1 0 0 0 1 0
4044 2129 79 Illinois Female White or Caucasian (Non-Hispanic) Less than $29,999 No Retired 70 or older Midwest ... 0 0 0 0 0 0 0 0 0 0
4045 2130 77 Pennsylvania Female Multiracial $30,000 to $49,999 No Retired 70 or older Northeast ... 0 0 1 0 0 0 0 0 0 0

4046 rows × 33 columns

Q26 Subscriptions

In [18]:
# Creating a dataframe consisting of only consumer features and Q26 columns

df_temp = merged_df

Q26_cols = [col for col in merged_df.columns if 'Q26' in col]
Q26_cols


#dataframe with columns for Question 26 
df_temp_3 = df_temp.loc[:, df_temp.columns.isin(Q26_cols)]

#dataframe with columns of consumer feature
df_temp_2=df_temp.loc[:, df_temp.columns.isin(temp)]
df_temp_2['quantity'] = 1


#combining the two dataframes
df_Q_26=pd.concat([df_temp_2 , df_temp_3 ], axis=1)
df_Q_26
Out[18]:
index Age State Gender Ethnicity Household Income Children in Household? Employment status Age range Region ... Q26 - Which of the following subscriptions does your household purchase?-Home internet Q26 - Which of the following subscriptions does your household purchase?-Landline telephone Q26 - Which of the following subscriptions does your household purchase?-Magazine (print or digital) Q26 - Which of the following subscriptions does your household purchase?-Mobile data plan Q26 - Which of the following subscriptions does your household purchase?-Mobile voice (smartphone or basic mobile phone calling plan) Q26 - Which of the following subscriptions does your household purchase?-News/Newspaper (print or digital) Q26 - Which of the following subscriptions does your household purchase?-None of the above Q26 - Which of the following subscriptions does your household purchase?-Pay TV (traditional cable and/or satellite bundle) Q26 - Which of the following subscriptions does your household purchase?-Streaming music service Q26 - Which of the following subscriptions does your household purchase?-Streaming video service
0 0 29 New York Female White or Caucasian (Non-Hispanic) $50,000 to $99,999 Yes Employed full-time or part-time 24-29 Northeast ... 0 0 1 0 0 0 0 1 0 0
1 1 29 California Male White or Caucasian (Non-Hispanic) $50,000 to $99,999 Yes Employed full-time or part-time 24-29 West ... 1 1 0 1 1 0 0 1 0 1
2 2 29 Colorado Female White or Caucasian (Non-Hispanic) Less than $29,999 No Employed full-time or part-time 24-29 West ... 0 0 0 1 1 0 0 0 1 1
3 3 29 South Carolina Female White or Caucasian (Non-Hispanic) $30,000 to $49,999 Yes Student 24-29 South ... 1 0 0 1 0 0 0 0 0 0
4 4 29 Virginia Male White or Caucasian (Non-Hispanic) $50,000 to $99,999 Yes Employed full-time or part-time 24-29 South ... 0 0 0 0 0 0 0 0 0 0
5 5 29 Pennsylvania Male Multiracial $50,000 to $99,999 No Unemployed 24-29 Northeast ... 1 0 1 0 0 0 0 0 0 1
6 6 24 Michigan Female White or Caucasian (Non-Hispanic) Less than $29,999 No Employed full-time or part-time 24-29 Midwest ... 1 0 0 0 0 0 0 1 0 1
7 7 29 New Jersey Female White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Employed full-time or part-time 24-29 Northeast ... 1 1 1 0 0 1 0 1 0 1
8 9 29 Florida Male Multiracial Less than $29,999 No Student 24-29 South ... 1 0 0 1 0 0 0 1 0 1
9 10 29 New York Female African American $100,000 to $299,999 Yes Employed full-time or part-time 24-29 Northeast ... 0 0 1 1 1 0 0 0 1 1
10 11 29 California Female East Asian or Southeast Asian $50,000 to $99,999 Yes Unemployed 24-29 West ... 1 0 0 1 0 0 0 0 0 0
11 12 29 New Jersey Female Hispanic $50,000 to $99,999 No Employed full-time or part-time 24-29 Northeast ... 1 1 1 1 1 1 0 1 1 1
12 13 29 Maine Male White or Caucasian (Non-Hispanic) $50,000 to $99,999 Yes Employed full-time or part-time 24-29 Northeast ... 1 0 0 0 0 1 0 1 1 1
13 14 29 Washington Male East Asian or Southeast Asian $50,000 to $99,999 No Unemployed 24-29 West ... 1 1 0 1 0 0 0 1 0 0
14 15 29 Massachusetts Male East Asian or Southeast Asian $100,000 to $299,999 No Employed full-time or part-time 24-29 Northeast ... 0 0 0 0 0 0 0 1 0 0
15 16 20 Wisconsin Male East Asian or Southeast Asian Less than $29,999 No Student 14-23 Midwest ... 1 0 0 1 0 0 0 1 0 1
16 17 29 Maine Female White or Caucasian (Non-Hispanic) $50,000 to $99,999 Yes Employed full-time or part-time 24-29 Northeast ... 1 1 0 1 0 1 0 1 0 1
17 18 26 Florida Male White or Caucasian (Non-Hispanic) $50,000 to $99,999 Yes Self-employed 24-29 South ... 1 1 0 1 1 1 0 1 1 1
18 19 29 Tennessee Female White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Employed full-time or part-time 24-29 South ... 0 0 0 0 0 0 1 0 0 0
19 20 29 Tennessee Male White or Caucasian (Non-Hispanic) Less than $29,999 Yes Student 24-29 South ... 1 0 0 1 0 0 0 1 1 1
20 22 29 Wisconsin Female White or Caucasian (Non-Hispanic) $30,000 to $49,999 Yes Unemployed 24-29 Midwest ... 1 0 1 1 1 0 0 1 1 1
21 23 29 North Carolina Male White or Caucasian (Non-Hispanic) $30,000 to $49,999 No Employed full-time or part-time 24-29 South ... 1 0 0 1 1 0 0 1 0 1
22 24 17 California Male Hispanic $50,000 to $99,999 Yes Student 14-23 West ... 1 1 0 1 1 1 0 1 0 0
23 25 29 Missouri Male White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Employed full-time or part-time 24-29 Midwest ... 1 0 1 1 0 1 0 1 1 1
24 26 29 Ohio Male White or Caucasian (Non-Hispanic) Less than $29,999 No Employed full-time or part-time 24-29 Midwest ... 1 0 1 0 1 1 0 1 0 0
25 27 29 New Jersey Female White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Employed full-time or part-time 24-29 Northeast ... 1 1 0 1 1 0 0 1 0 1
26 28 29 Texas Female White or Caucasian (Non-Hispanic) $30,000 to $49,999 No Unemployed 24-29 South ... 0 0 0 0 0 0 1 0 0 0
27 29 24 Arizona Female White or Caucasian (Non-Hispanic) $30,000 to $49,999 No Student 24-29 West ... 1 0 0 0 0 0 0 0 0 1
28 30 29 California Female Hispanic Less than $29,999 Yes Unemployed 24-29 West ... 1 0 0 1 1 0 0 0 1 1
29 31 29 California Female East Asian or Southeast Asian $50,000 to $99,999 No Employed full-time or part-time 24-29 West ... 0 0 0 1 0 0 0 0 0 0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
4016 2096 71 South Carolina Male Multiracial $30,000 to $49,999 No Retired 70 or older South ... 1 1 0 0 0 0 0 1 0 0
4017 2097 73 Missouri Male White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Retired 70 or older Midwest ... 1 1 1 1 1 0 0 1 0 0
4018 2099 73 Florida Male White or Caucasian (Non-Hispanic) Less than $29,999 No Self-employed 70 or older South ... 1 0 0 1 1 0 0 1 1 1
4019 2100 18 Louisiana Male White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Student 14-19 South ... 1 0 0 1 1 1 0 0 1 1
4020 2101 18 South Dakota Female White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Unemployed 14-19 Midwest ... 0 0 1 1 0 0 0 0 0 0
4021 2102 15 New York Male White or Caucasian (Non-Hispanic) $100,000 to $299,999 Yes Self-employed 14-19 Northeast ... 1 1 1 1 1 1 0 1 1 1
4022 2103 15 Florida Female White or Caucasian (Non-Hispanic) $100,000 to $299,999 Yes Employed full-time or part-time 14-19 South ... 0 0 1 1 1 1 0 1 0 1
4023 2105 71 Idaho Female White or Caucasian (Non-Hispanic) Less than $29,999 No Retired 70 or older West ... 0 1 0 0 0 0 0 0 0 0
4024 2106 76 Wisconsin Female White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Retired 70 or older Midwest ... 1 1 1 1 0 1 0 1 0 0
4025 2107 18 California Female Hispanic $30,000 to $49,999 No Student 14-19 West ... 0 0 0 0 0 0 1 0 0 0
4026 2109 73 Pennsylvania Female White or Caucasian (Non-Hispanic) $30,000 to $49,999 No Retired 70 or older Northeast ... 0 1 0 0 0 0 0 1 0 1
4027 2110 71 Virginia Male White or Caucasian (Non-Hispanic) Less than $29,999 No Retired 70 or older South ... 0 0 0 0 0 0 0 1 0 0
4028 2111 77 Michigan Female Multiracial $30,000 to $49,999 No Retired 70 or older Midwest ... 1 1 0 0 0 0 0 1 0 0
4029 2112 18 Texas Male White or Caucasian (Non-Hispanic) $30,000 to $49,999 Yes Student 14-19 South ... 1 0 0 0 1 0 0 0 0 1
4030 2113 71 Alabama Male White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Retired 70 or older South ... 1 1 1 1 1 0 0 1 1 1
4031 2115 14 Kansas Male White or Caucasian (Non-Hispanic) $50,000 to $99,999 Yes Self-employed 14-19 Midwest ... 1 1 1 0 0 0 0 0 1 1
4032 2116 70 Florida Female White or Caucasian (Non-Hispanic) Less than $29,999 No Retired 70 or older South ... 1 1 0 0 0 0 0 0 0 0
4033 2117 19 Maryland Female White or Caucasian (Non-Hispanic) $100,000 to $299,999 No Student 14-19 South ... 1 1 1 1 0 1 0 1 1 1
4034 2118 15 Florida Male White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Student 14-19 South ... 1 0 0 1 1 1 0 0 1 0
4035 2119 14 Illinois Male White or Caucasian (Non-Hispanic) $100,000 to $299,999 Yes Student 14-19 Midwest ... 1 1 0 1 1 0 0 1 0 0
4036 2120 70 Washington Female White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Retired 70 or older West ... 1 1 1 0 0 1 0 1 0 0
4037 2121 19 Virginia Female White or Caucasian (Non-Hispanic) Less than $29,999 Yes Unemployed 14-19 South ... 1 1 0 1 0 0 0 1 1 1
4038 2122 18 Florida Male White or Caucasian (Non-Hispanic) $100,000 to $299,999 No Student 14-19 South ... 1 1 1 1 1 1 0 1 1 1
4039 2124 15 Oregon Female White or Caucasian (Non-Hispanic) $100,000 to $299,999 Yes Student 14-19 West ... 0 0 0 1 0 0 0 1 0 1
4040 2125 14 Ohio Male Multiracial Less than $29,999 Yes Student 14-19 Midwest ... 1 1 0 0 0 0 0 1 0 1
4041 2126 14 Oregon Male White or Caucasian (Non-Hispanic) $100,000 to $299,999 Yes Unemployed 14-19 West ... 1 0 1 0 0 0 0 0 0 0
4042 2127 70 Massachusetts Male White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Retired 70 or older Northeast ... 1 1 0 0 0 0 0 0 0 0
4043 2128 18 Alabama Male White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Unemployed 14-19 South ... 1 0 0 0 0 0 0 1 0 0
4044 2129 79 Illinois Female White or Caucasian (Non-Hispanic) Less than $29,999 No Retired 70 or older Midwest ... 1 0 1 0 0 1 0 1 0 0
4045 2130 77 Pennsylvania Female Multiracial $30,000 to $49,999 No Retired 70 or older Northeast ... 1 1 0 0 1 0 0 0 0 0

4046 rows × 24 columns

Q10 Future Products

In [19]:
# Creating a dataframe consisting of only consumer features and Q10 columns

df_temp = merged_df

Q10_cols = [col for col in merged_df.columns if 'Q10' in col]
Q10_cols


#dataframe with columns for Question 10 
df_temp_4 = df_temp.loc[:, df_temp.columns.isin(Q10_cols)]

#dataframe with columns of consumer feature
df_temp_2=df_temp.loc[:, df_temp.columns.isin(temp)]
df_temp_2['quantity'] = 1


#combining the two dataframes
df_Q_10=pd.concat([df_temp_2 , df_temp_4 ], axis=1)
df_Q_10
Out[19]:
index Age State Gender Ethnicity Household Income Children in Household? Employment status Age range Region ... Q10 - Of those products you indicated you do not currently own, which of the following do you plan to purchase in the next 12 months?-Laptop computer Q10 - Of those products you indicated you do not currently own, which of the following do you plan to purchase in the next 12 months?-None of the above Q10 - Of those products you indicated you do not currently own, which of the following do you plan to purchase in the next 12 months?-Over-the-air digital TV antenna (for free access to network broadcast without pay TV subscription) Q10 - Of those products you indicated you do not currently own, which of the following do you plan to purchase in the next 12 months?-Portable streaming thumb drive/fob Q10 - Of those products you indicated you do not currently own, which of the following do you plan to purchase in the next 12 months?-Portable video game player Q10 - Of those products you indicated you do not currently own, which of the following do you plan to purchase in the next 12 months?-Smart watch Q10 - Of those products you indicated you do not currently own, which of the following do you plan to purchase in the next 12 months?-Smartphone Q10 - Of those products you indicated you do not currently own, which of the following do you plan to purchase in the next 12 months?-Streaming media box or over-the-top box Q10 - Of those products you indicated you do not currently own, which of the following do you plan to purchase in the next 12 months?-Tablet Q10 - Of those products you indicated you do not currently own, which of the following do you plan to purchase in the next 12 months?-Virtual reality headset
0 0 29 New York Female White or Caucasian (Non-Hispanic) $50,000 to $99,999 Yes Employed full-time or part-time 24-29 Northeast ... 0 0 0 0 0 0 0 0 0 0
1 1 29 California Male White or Caucasian (Non-Hispanic) $50,000 to $99,999 Yes Employed full-time or part-time 24-29 West ... 0 0 1 0 0 0 0 0 0 0
2 2 29 Colorado Female White or Caucasian (Non-Hispanic) Less than $29,999 No Employed full-time or part-time 24-29 West ... 0 1 0 0 0 0 0 0 0 0
3 3 29 South Carolina Female White or Caucasian (Non-Hispanic) $30,000 to $49,999 Yes Student 24-29 South ... 0 0 0 0 1 0 0 0 0 0
4 4 29 Virginia Male White or Caucasian (Non-Hispanic) $50,000 to $99,999 Yes Employed full-time or part-time 24-29 South ... 0 0 0 0 0 0 0 0 0 0
5 5 29 Pennsylvania Male Multiracial $50,000 to $99,999 No Unemployed 24-29 Northeast ... 0 0 0 0 1 0 0 0 1 0
6 6 24 Michigan Female White or Caucasian (Non-Hispanic) Less than $29,999 No Employed full-time or part-time 24-29 Midwest ... 0 1 0 0 0 0 0 0 0 0
7 7 29 New Jersey Female White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Employed full-time or part-time 24-29 Northeast ... 0 0 0 0 0 0 0 1 1 0
8 9 29 Florida Male Multiracial Less than $29,999 No Student 24-29 South ... 0 0 0 0 0 0 0 0 0 0
9 10 29 New York Female African American $100,000 to $299,999 Yes Employed full-time or part-time 24-29 Northeast ... 0 0 1 0 0 1 1 1 0 0
10 11 29 California Female East Asian or Southeast Asian $50,000 to $99,999 Yes Unemployed 24-29 West ... 0 0 0 0 0 0 0 0 1 0
11 12 29 New Jersey Female Hispanic $50,000 to $99,999 No Employed full-time or part-time 24-29 Northeast ... 0 1 0 0 0 0 0 0 0 0
12 13 29 Maine Male White or Caucasian (Non-Hispanic) $50,000 to $99,999 Yes Employed full-time or part-time 24-29 Northeast ... 0 0 0 0 0 1 0 0 0 0
13 14 29 Washington Male East Asian or Southeast Asian $50,000 to $99,999 No Unemployed 24-29 West ... 0 0 0 0 0 0 0 0 0 0
14 15 29 Massachusetts Male East Asian or Southeast Asian $100,000 to $299,999 No Employed full-time or part-time 24-29 Northeast ... 0 0 0 0 0 0 0 0 0 1
15 16 20 Wisconsin Male East Asian or Southeast Asian Less than $29,999 No Student 14-23 Midwest ... 0 1 0 0 0 0 0 0 0 0
16 17 29 Maine Female White or Caucasian (Non-Hispanic) $50,000 to $99,999 Yes Employed full-time or part-time 24-29 Northeast ... 0 0 0 0 0 0 0 1 0 0
17 18 26 Florida Male White or Caucasian (Non-Hispanic) $50,000 to $99,999 Yes Self-employed 24-29 South ... 0 0 0 1 0 0 0 1 0 1
18 19 29 Tennessee Female White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Employed full-time or part-time 24-29 South ... 0 1 0 0 0 0 0 0 0 0
19 20 29 Tennessee Male White or Caucasian (Non-Hispanic) Less than $29,999 Yes Student 24-29 South ... 0 0 0 0 0 0 1 0 0 0
20 22 29 Wisconsin Female White or Caucasian (Non-Hispanic) $30,000 to $49,999 Yes Unemployed 24-29 Midwest ... 0 0 0 0 0 0 0 0 0 0
21 23 29 North Carolina Male White or Caucasian (Non-Hispanic) $30,000 to $49,999 No Employed full-time or part-time 24-29 South ... 0 0 0 0 0 1 0 1 0 0
22 24 17 California Male Hispanic $50,000 to $99,999 Yes Student 14-23 West ... 0 0 0 0 0 0 0 1 0 0
23 25 29 Missouri Male White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Employed full-time or part-time 24-29 Midwest ... 0 0 0 0 0 0 0 0 0 1
24 26 29 Ohio Male White or Caucasian (Non-Hispanic) Less than $29,999 No Employed full-time or part-time 24-29 Midwest ... 0 0 0 0 0 0 0 0 1 0
25 27 29 New Jersey Female White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Employed full-time or part-time 24-29 Northeast ... 0 0 0 0 0 0 0 0 0 0
26 28 29 Texas Female White or Caucasian (Non-Hispanic) $30,000 to $49,999 No Unemployed 24-29 South ... 0 1 0 0 0 0 0 0 0 0
27 29 24 Arizona Female White or Caucasian (Non-Hispanic) $30,000 to $49,999 No Student 24-29 West ... 0 0 0 0 0 0 0 0 0 0
28 30 29 California Female Hispanic Less than $29,999 Yes Unemployed 24-29 West ... 0 1 0 0 0 0 0 0 0 0
29 31 29 California Female East Asian or Southeast Asian $50,000 to $99,999 No Employed full-time or part-time 24-29 West ... 0 1 0 0 0 0 0 0 0 0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
4016 2096 71 South Carolina Male Multiracial $30,000 to $49,999 No Retired 70 or older South ... 0 1 0 0 0 0 0 0 0 0
4017 2097 73 Missouri Male White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Retired 70 or older Midwest ... 0 1 0 0 0 0 0 0 0 0
4018 2099 73 Florida Male White or Caucasian (Non-Hispanic) Less than $29,999 No Self-employed 70 or older South ... 0 0 0 0 0 0 0 0 0 0
4019 2100 18 Louisiana Male White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Student 14-19 South ... 1 0 0 0 0 0 0 0 0 0
4020 2101 18 South Dakota Female White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Unemployed 14-19 Midwest ... 0 1 0 0 0 0 0 0 0 0
4021 2102 15 New York Male White or Caucasian (Non-Hispanic) $100,000 to $299,999 Yes Self-employed 14-19 Northeast ... 0 1 0 0 0 0 0 0 0 0
4022 2103 15 Florida Female White or Caucasian (Non-Hispanic) $100,000 to $299,999 Yes Employed full-time or part-time 14-19 South ... 0 0 0 0 0 1 0 0 1 0
4023 2105 71 Idaho Female White or Caucasian (Non-Hispanic) Less than $29,999 No Retired 70 or older West ... 0 1 0 0 0 0 0 0 0 0
4024 2106 76 Wisconsin Female White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Retired 70 or older Midwest ... 0 0 0 0 0 0 1 0 1 0
4025 2107 18 California Female Hispanic $30,000 to $49,999 No Student 14-19 West ... 0 0 0 0 0 1 0 0 0 1
4026 2109 73 Pennsylvania Female White or Caucasian (Non-Hispanic) $30,000 to $49,999 No Retired 70 or older Northeast ... 0 0 1 0 1 1 0 1 0 1
4027 2110 71 Virginia Male White or Caucasian (Non-Hispanic) Less than $29,999 No Retired 70 or older South ... 0 1 0 0 0 0 0 0 0 0
4028 2111 77 Michigan Female Multiracial $30,000 to $49,999 No Retired 70 or older Midwest ... 0 0 0 0 0 0 0 0 0 0
4029 2112 18 Texas Male White or Caucasian (Non-Hispanic) $30,000 to $49,999 Yes Student 14-19 South ... 0 0 0 0 0 0 0 0 0 0
4030 2113 71 Alabama Male White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Retired 70 or older South ... 0 0 0 0 0 0 0 1 0 0
4031 2115 14 Kansas Male White or Caucasian (Non-Hispanic) $50,000 to $99,999 Yes Self-employed 14-19 Midwest ... 0 0 1 1 1 0 0 0 0 0
4032 2116 70 Florida Female White or Caucasian (Non-Hispanic) Less than $29,999 No Retired 70 or older South ... 0 1 0 0 0 0 0 0 0 0
4033 2117 19 Maryland Female White or Caucasian (Non-Hispanic) $100,000 to $299,999 No Student 14-19 South ... 0 0 0 1 0 0 0 0 0 0
4034 2118 15 Florida Male White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Student 14-19 South ... 0 0 0 0 1 1 1 0 0 0
4035 2119 14 Illinois Male White or Caucasian (Non-Hispanic) $100,000 to $299,999 Yes Student 14-19 Midwest ... 0 0 0 0 0 1 1 0 0 0
4036 2120 70 Washington Female White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Retired 70 or older West ... 0 1 0 0 0 0 0 0 0 0
4037 2121 19 Virginia Female White or Caucasian (Non-Hispanic) Less than $29,999 Yes Unemployed 14-19 South ... 0 0 0 0 0 1 0 0 0 0
4038 2122 18 Florida Male White or Caucasian (Non-Hispanic) $100,000 to $299,999 No Student 14-19 South ... 0 1 0 0 0 0 0 0 0 0
4039 2124 15 Oregon Female White or Caucasian (Non-Hispanic) $100,000 to $299,999 Yes Student 14-19 West ... 0 0 0 0 0 0 0 0 0 0
4040 2125 14 Ohio Male Multiracial Less than $29,999 Yes Student 14-19 Midwest ... 1 0 0 0 0 0 0 0 0 0
4041 2126 14 Oregon Male White or Caucasian (Non-Hispanic) $100,000 to $299,999 Yes Unemployed 14-19 West ... 0 0 1 1 1 1 1 0 0 0
4042 2127 70 Massachusetts Male White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Retired 70 or older Northeast ... 0 1 0 0 0 0 0 0 0 0
4043 2128 18 Alabama Male White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Unemployed 14-19 South ... 1 0 0 0 0 0 0 0 0 1
4044 2129 79 Illinois Female White or Caucasian (Non-Hispanic) Less than $29,999 No Retired 70 or older Midwest ... 0 1 0 0 0 0 0 0 0 0
4045 2130 77 Pennsylvania Female Multiracial $30,000 to $49,999 No Retired 70 or older Northeast ... 0 1 0 0 0 0 0 0 0 0

4046 rows × 33 columns

In [20]:
#introduce a column for products to capture all the products owned by the consumer
df_Q_8['products'] = 0
 
    
#travering through the survey enteries of each consumer to create a string of products that each consumer owns
#Ignoring the columns where products owned = Don't know or None of the above

for index , row in df_Q_8.iterrows():  
    list_products = []
    if(df_Q_8.iloc[index,12] == 1):
        list_products.append("Basic mobile phone")
    if(df_Q_8.iloc[index,13] == 1):
        list_products.append('Blu-ray disc player/DVD player')    
    if(df_Q_8.iloc[index,14] == 1):
        list_products.append('Computer network/router in your home for wireless computer/laptop usage')  
    if(df_Q_8.iloc[index,15] == 1):
        list_products.append("Dedicated e-book reader")
    if(df_Q_8.iloc[index,16] == 1):
        list_products.append("Desktop computer") 
    if(df_Q_8.iloc[index,17] == 1):
        list_products.append("Digital video recorder")  
    if(df_Q_8.iloc[index,19] == 1):
        list_products.append("Drone")  
    if(df_Q_8.iloc[index,20] == 1):
        list_products.append("Fitness band")   
    if(df_Q_8.iloc[index,21] == 1):
        list_products.append("Flat panel television")   
    if(df_Q_8.iloc[index,22] == 1):
        list_products.append("Gaming console")   
    if(df_Q_8.iloc[index,23] == 1):
        list_products.append("Laptop computer")   
    if(df_Q_8.iloc[index,25] == 1):
        list_products.append("Over-the-air digital TV antenna ")   
    if(df_Q_8.iloc[index,26] == 1):
        list_products.append("Portable streaming thumb drive/fob")   
    if(df_Q_8.iloc[index,27] == 1):
        list_products.append("Portable video game player")  
    if(df_Q_8.iloc[index,28] == 1):
        list_products.append("Smart watch")  
    if(df_Q_8.iloc[index,29] == 1):
        list_products.append("Smartphone")  
    if(df_Q_8.iloc[index,30] == 1):
        list_products.append("Streaming media box or over-the-top box")  
    if(df_Q_8.iloc[index,31] == 1):
        list_products.append("Tablet")
    if(df_Q_8.iloc[index,32] == 1):
        list_products.append("Virtual reality headset") 
   
    listToStr = ','.join([str(elem) for elem in list_products])
                    
    df_Q_8.iloc[index,33] = listToStr
    

df_Q_8
Out[20]:
index Age State Gender Ethnicity Household Income Children in Household? Employment status Age range Region ... Q8 - Which of the following media or home entertainment equipment does your household own?-None of the above Q8 - Which of the following media or home entertainment equipment does your household own?-Over-the-air digital TV antenna (for free access to network broadcast without pay TV subscription) Q8 - Which of the following media or home entertainment equipment does your household own?-Portable streaming thumb drive/fob Q8 - Which of the following media or home entertainment equipment does your household own?-Portable video game player Q8 - Which of the following media or home entertainment equipment does your household own?-Smart watch Q8 - Which of the following media or home entertainment equipment does your household own?-Smartphone Q8 - Which of the following media or home entertainment equipment does your household own?-Streaming media box or over-the-top box Q8 - Which of the following media or home entertainment equipment does your household own?-Tablet Q8 - Which of the following media or home entertainment equipment does your household own?-Virtual reality headset products
0 0 29 New York Female White or Caucasian (Non-Hispanic) $50,000 to $99,999 Yes Employed full-time or part-time 24-29 Northeast ... 0 1 0 0 0 1 1 1 0 Computer network/router in your home for wirel...
1 1 29 California Male White or Caucasian (Non-Hispanic) $50,000 to $99,999 Yes Employed full-time or part-time 24-29 West ... 0 0 1 1 0 1 0 1 0 Blu-ray disc player/DVD player,Desktop compute...
2 2 29 Colorado Female White or Caucasian (Non-Hispanic) Less than $29,999 No Employed full-time or part-time 24-29 West ... 0 0 0 0 0 1 0 0 0 Blu-ray disc player/DVD player,Computer networ...
3 3 29 South Carolina Female White or Caucasian (Non-Hispanic) $30,000 to $49,999 Yes Student 24-29 South ... 0 1 1 0 0 1 1 1 0 Computer network/router in your home for wirel...
4 4 29 Virginia Male White or Caucasian (Non-Hispanic) $50,000 to $99,999 Yes Employed full-time or part-time 24-29 South ... 0 0 0 0 1 1 0 1 0 Blu-ray disc player/DVD player,Desktop compute...
5 5 29 Pennsylvania Male Multiracial $50,000 to $99,999 No Unemployed 24-29 Northeast ... 0 1 1 0 1 1 0 0 1 Basic mobile phone,Blu-ray disc player/DVD pla...
6 6 24 Michigan Female White or Caucasian (Non-Hispanic) Less than $29,999 No Employed full-time or part-time 24-29 Midwest ... 0 0 0 0 0 1 1 1 0 Computer network/router in your home for wirel...
7 7 29 New Jersey Female White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Employed full-time or part-time 24-29 Northeast ... 0 1 0 0 0 1 0 0 0 Blu-ray disc player/DVD player,Computer networ...
8 9 29 Florida Male Multiracial Less than $29,999 No Student 24-29 South ... 0 0 0 0 0 1 0 0 0 Basic mobile phone,Computer network/router in ...
9 10 29 New York Female African American $100,000 to $299,999 Yes Employed full-time or part-time 24-29 Northeast ... 0 0 0 1 0 0 0 1 1 Basic mobile phone,Blu-ray disc player/DVD pla...
10 11 29 California Female East Asian or Southeast Asian $50,000 to $99,999 Yes Unemployed 24-29 West ... 0 1 1 0 1 1 0 0 0 Basic mobile phone,Blu-ray disc player/DVD pla...
11 12 29 New Jersey Female Hispanic $50,000 to $99,999 No Employed full-time or part-time 24-29 Northeast ... 0 1 1 1 1 1 1 1 1 Basic mobile phone,Blu-ray disc player/DVD pla...
12 13 29 Maine Male White or Caucasian (Non-Hispanic) $50,000 to $99,999 Yes Employed full-time or part-time 24-29 Northeast ... 0 0 1 0 0 1 1 1 0 Blu-ray disc player/DVD player,Flat panel tele...
13 14 29 Washington Male East Asian or Southeast Asian $50,000 to $99,999 No Unemployed 24-29 West ... 0 0 0 0 0 1 0 0 0 Gaming console,Laptop computer,Smartphone
14 15 29 Massachusetts Male East Asian or Southeast Asian $100,000 to $299,999 No Employed full-time or part-time 24-29 Northeast ... 0 0 0 0 0 1 0 1 0 Desktop computer,Fitness band,Gaming console,L...
15 16 20 Wisconsin Male East Asian or Southeast Asian Less than $29,999 No Student 14-23 Midwest ... 0 0 1 1 1 1 0 1 0 Computer network/router in your home for wirel...
16 17 29 Maine Female White or Caucasian (Non-Hispanic) $50,000 to $99,999 Yes Employed full-time or part-time 24-29 Northeast ... 0 1 0 0 0 1 0 1 0 Blu-ray disc player/DVD player,Computer networ...
17 18 26 Florida Male White or Caucasian (Non-Hispanic) $50,000 to $99,999 Yes Self-employed 24-29 South ... 0 1 0 0 1 1 0 1 0 Blu-ray disc player/DVD player,Desktop compute...
18 19 29 Tennessee Female White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Employed full-time or part-time 24-29 South ... 0 0 0 1 0 1 0 1 0 Computer network/router in your home for wirel...
19 20 29 Tennessee Male White or Caucasian (Non-Hispanic) Less than $29,999 Yes Student 24-29 South ... 0 0 0 1 0 0 1 1 0 Basic mobile phone,Blu-ray disc player/DVD pla...
20 22 29 Wisconsin Female White or Caucasian (Non-Hispanic) $30,000 to $49,999 Yes Unemployed 24-29 Midwest ... 0 0 1 1 1 1 0 1 1 Blu-ray disc player/DVD player,Computer networ...
21 23 29 North Carolina Male White or Caucasian (Non-Hispanic) $30,000 to $49,999 No Employed full-time or part-time 24-29 South ... 0 1 0 0 0 1 0 1 0 Blu-ray disc player/DVD player,Computer networ...
22 24 17 California Male Hispanic $50,000 to $99,999 Yes Student 14-23 West ... 0 0 0 1 0 1 0 1 0 Blu-ray disc player/DVD player,Computer networ...
23 25 29 Missouri Male White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Employed full-time or part-time 24-29 Midwest ... 0 0 0 0 1 1 0 1 0 Blu-ray disc player/DVD player,Desktop compute...
24 26 29 Ohio Male White or Caucasian (Non-Hispanic) Less than $29,999 No Employed full-time or part-time 24-29 Midwest ... 0 0 0 0 0 1 0 0 0 Digital video recorder,Flat panel television,G...
25 27 29 New Jersey Female White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Employed full-time or part-time 24-29 Northeast ... 0 0 1 1 0 1 1 1 0 Computer network/router in your home for wirel...
26 28 29 Texas Female White or Caucasian (Non-Hispanic) $30,000 to $49,999 No Unemployed 24-29 South ... 0 0 0 0 0 1 0 0 0 Gaming console,Laptop computer,Smartphone
27 29 24 Arizona Female White or Caucasian (Non-Hispanic) $30,000 to $49,999 No Student 24-29 West ... 0 0 0 1 0 1 0 1 0 Blu-ray disc player/DVD player,Computer networ...
28 30 29 California Female Hispanic Less than $29,999 Yes Unemployed 24-29 West ... 0 0 0 0 0 1 0 1 0 Blu-ray disc player/DVD player,Fitness band,Fl...
29 31 29 California Female East Asian or Southeast Asian $50,000 to $99,999 No Employed full-time or part-time 24-29 West ... 0 1 0 0 0 1 0 1 0 Computer network/router in your home for wirel...
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
4016 2096 71 South Carolina Male Multiracial $30,000 to $49,999 No Retired 70 or older South ... 0 0 0 0 0 0 0 0 0 Blu-ray disc player/DVD player,Desktop compute...
4017 2097 73 Missouri Male White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Retired 70 or older Midwest ... 0 0 0 0 0 1 0 1 0 Computer network/router in your home for wirel...
4018 2099 73 Florida Male White or Caucasian (Non-Hispanic) Less than $29,999 No Self-employed 70 or older South ... 0 0 1 1 1 1 0 1 1 Blu-ray disc player/DVD player,Flat panel tele...
4019 2100 18 Louisiana Male White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Student 14-19 South ... 0 1 1 1 0 1 0 1 0 Blu-ray disc player/DVD player,Computer networ...
4020 2101 18 South Dakota Female White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Unemployed 14-19 Midwest ... 0 0 0 0 0 0 0 0 0 Gaming console,Laptop computer
4021 2102 15 New York Male White or Caucasian (Non-Hispanic) $100,000 to $299,999 Yes Self-employed 14-19 Northeast ... 0 1 1 1 1 1 1 1 1 Blu-ray disc player/DVD player,Computer networ...
4022 2103 15 Florida Female White or Caucasian (Non-Hispanic) $100,000 to $299,999 Yes Employed full-time or part-time 14-19 South ... 0 1 0 1 0 1 0 0 0 Blu-ray disc player/DVD player,Computer networ...
4023 2105 71 Idaho Female White or Caucasian (Non-Hispanic) Less than $29,999 No Retired 70 or older West ... 0 0 0 0 0 0 0 0 0 Basic mobile phone,Blu-ray disc player/DVD pla...
4024 2106 76 Wisconsin Female White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Retired 70 or older Midwest ... 0 0 0 0 0 0 0 0 0 Basic mobile phone,Blu-ray disc player/DVD pla...
4025 2107 18 California Female Hispanic $30,000 to $49,999 No Student 14-19 West ... 0 0 0 1 0 1 0 1 0 Blu-ray disc player/DVD player,Desktop compute...
4026 2109 73 Pennsylvania Female White or Caucasian (Non-Hispanic) $30,000 to $49,999 No Retired 70 or older Northeast ... 0 0 1 0 0 1 0 1 0 Blu-ray disc player/DVD player,Computer networ...
4027 2110 71 Virginia Male White or Caucasian (Non-Hispanic) Less than $29,999 No Retired 70 or older South ... 0 0 0 0 0 0 0 1 0 Basic mobile phone,Blu-ray disc player/DVD pla...
4028 2111 77 Michigan Female Multiracial $30,000 to $49,999 No Retired 70 or older Midwest ... 0 0 0 0 0 0 0 0 0 Basic mobile phone,Computer network/router in ...
4029 2112 18 Texas Male White or Caucasian (Non-Hispanic) $30,000 to $49,999 Yes Student 14-19 South ... 0 1 0 0 0 1 0 1 0 Blu-ray disc player/DVD player,Computer networ...
4030 2113 71 Alabama Male White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Retired 70 or older South ... 0 0 0 0 0 1 0 1 0 Blu-ray disc player/DVD player,Computer networ...
4031 2115 14 Kansas Male White or Caucasian (Non-Hispanic) $50,000 to $99,999 Yes Self-employed 14-19 Midwest ... 0 0 0 0 1 1 0 1 1 Blu-ray disc player/DVD player,Computer networ...
4032 2116 70 Florida Female White or Caucasian (Non-Hispanic) Less than $29,999 No Retired 70 or older South ... 0 1 0 0 0 0 0 0 0 Basic mobile phone,Computer network/router in ...
4033 2117 19 Maryland Female White or Caucasian (Non-Hispanic) $100,000 to $299,999 No Student 14-19 South ... 0 0 0 1 0 1 0 1 0 Computer network/router in your home for wirel...
4034 2118 15 Florida Male White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Student 14-19 South ... 0 0 0 0 0 0 1 1 1 Blu-ray disc player/DVD player,Digital video r...
4035 2119 14 Illinois Male White or Caucasian (Non-Hispanic) $100,000 to $299,999 Yes Student 14-19 Midwest ... 0 0 0 0 0 0 0 0 0 Dedicated e-book reader,Digital video recorder...
4036 2120 70 Washington Female White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Retired 70 or older West ... 0 0 0 0 0 0 0 0 0 Blu-ray disc player/DVD player,Computer networ...
4037 2121 19 Virginia Female White or Caucasian (Non-Hispanic) Less than $29,999 Yes Unemployed 14-19 South ... 0 0 0 1 0 1 0 1 0 Blu-ray disc player/DVD player,Dedicated e-boo...
4038 2122 18 Florida Male White or Caucasian (Non-Hispanic) $100,000 to $299,999 No Student 14-19 South ... 0 0 0 0 0 1 1 1 0 Computer network/router in your home for wirel...
4039 2124 15 Oregon Female White or Caucasian (Non-Hispanic) $100,000 to $299,999 Yes Student 14-19 West ... 0 0 0 0 0 1 0 1 0 Computer network/router in your home for wirel...
4040 2125 14 Ohio Male Multiracial Less than $29,999 Yes Student 14-19 Midwest ... 0 0 0 0 0 1 0 1 1 Flat panel television,Smartphone,Tablet,Virtua...
4041 2126 14 Oregon Male White or Caucasian (Non-Hispanic) $100,000 to $299,999 Yes Unemployed 14-19 West ... 0 0 0 0 0 0 0 1 0 Blu-ray disc player/DVD player,Computer networ...
4042 2127 70 Massachusetts Male White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Retired 70 or older Northeast ... 0 1 0 0 0 1 1 1 0 Blu-ray disc player/DVD player,Computer networ...
4043 2128 18 Alabama Male White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Unemployed 14-19 South ... 0 0 0 1 0 0 0 1 0 Basic mobile phone,Computer network/router in ...
4044 2129 79 Illinois Female White or Caucasian (Non-Hispanic) Less than $29,999 No Retired 70 or older Midwest ... 0 0 0 0 0 0 0 0 0 Basic mobile phone,Desktop computer,Flat panel...
4045 2130 77 Pennsylvania Female Multiracial $30,000 to $49,999 No Retired 70 or older Northeast ... 0 1 0 0 0 0 0 0 0 Basic mobile phone,Blu-ray disc player/DVD pla...

4046 rows × 34 columns

In [21]:
#introduce a column for subscriptions to capture all the subscriptions purchased by the consumer
df_Q_26['subscriptions'] = 0

#travering through the survey enteries of each consumer to create a string of subscriptions that each consumer owns
#Ignoring the columns where subscriptions owned = Don't know or None of the above

for index , row in df_Q_26.iterrows():  
    list_subscription = []
    if(df_Q_26.iloc[index,13] == 1):
        list_subscription.append("Gaming")
    if(df_Q_26.iloc[index,14] == 1):
        list_subscription.append("Home Internet")
    if(df_Q_26.iloc[index,15] == 1):
        list_subscription.append("Landline telephone")  
    if(df_Q_26.iloc[index,16] == 1):
        list_subscription.append("Magazine(print or digital)")
    if(df_Q_26.iloc[index,17] == 1):
        list_subscription.append("Mobile data plan")
    if(df_Q_26.iloc[index,18] == 1):
        list_subscription.append("Mobile voice")    
    if(df_Q_26.iloc[index,19] == 1):
        list_subscription.append("News/Newspaper")  
    if(df_Q_26.iloc[index,21] == 1):
        list_subscription.append("Pay TV")  
    if(df_Q_26.iloc[index,22] == 1):
        list_subscription.append("Streaming music service")  
    if(df_Q_26.iloc[index,23] == 1):
        list_subscription.append("Streaming video service")  
        
    listToString = ','.join([str(elem) for elem in list_subscription])
                    
    df_Q_26.iloc[index,24] = listToString
    
df_Q_26  
Out[21]:
index Age State Gender Ethnicity Household Income Children in Household? Employment status Age range Region ... Q26 - Which of the following subscriptions does your household purchase?-Landline telephone Q26 - Which of the following subscriptions does your household purchase?-Magazine (print or digital) Q26 - Which of the following subscriptions does your household purchase?-Mobile data plan Q26 - Which of the following subscriptions does your household purchase?-Mobile voice (smartphone or basic mobile phone calling plan) Q26 - Which of the following subscriptions does your household purchase?-News/Newspaper (print or digital) Q26 - Which of the following subscriptions does your household purchase?-None of the above Q26 - Which of the following subscriptions does your household purchase?-Pay TV (traditional cable and/or satellite bundle) Q26 - Which of the following subscriptions does your household purchase?-Streaming music service Q26 - Which of the following subscriptions does your household purchase?-Streaming video service subscriptions
0 0 29 New York Female White or Caucasian (Non-Hispanic) $50,000 to $99,999 Yes Employed full-time or part-time 24-29 Northeast ... 0 1 0 0 0 0 1 0 0 Magazine(print or digital),Pay TV
1 1 29 California Male White or Caucasian (Non-Hispanic) $50,000 to $99,999 Yes Employed full-time or part-time 24-29 West ... 1 0 1 1 0 0 1 0 1 Home Internet,Landline telephone,Mobile data p...
2 2 29 Colorado Female White or Caucasian (Non-Hispanic) Less than $29,999 No Employed full-time or part-time 24-29 West ... 0 0 1 1 0 0 0 1 1 Mobile data plan,Mobile voice,Streaming music ...
3 3 29 South Carolina Female White or Caucasian (Non-Hispanic) $30,000 to $49,999 Yes Student 24-29 South ... 0 0 1 0 0 0 0 0 0 Home Internet,Mobile data plan
4 4 29 Virginia Male White or Caucasian (Non-Hispanic) $50,000 to $99,999 Yes Employed full-time or part-time 24-29 South ... 0 0 0 0 0 0 0 0 0 Gaming
5 5 29 Pennsylvania Male Multiracial $50,000 to $99,999 No Unemployed 24-29 Northeast ... 0 1 0 0 0 0 0 0 1 Gaming,Home Internet,Magazine(print or digital...
6 6 24 Michigan Female White or Caucasian (Non-Hispanic) Less than $29,999 No Employed full-time or part-time 24-29 Midwest ... 0 0 0 0 0 0 1 0 1 Gaming,Home Internet,Pay TV,Streaming video se...
7 7 29 New Jersey Female White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Employed full-time or part-time 24-29 Northeast ... 1 1 0 0 1 0 1 0 1 Home Internet,Landline telephone,Magazine(prin...
8 9 29 Florida Male Multiracial Less than $29,999 No Student 24-29 South ... 0 0 1 0 0 0 1 0 1 Home Internet,Mobile data plan,Pay TV,Streamin...
9 10 29 New York Female African American $100,000 to $299,999 Yes Employed full-time or part-time 24-29 Northeast ... 0 1 1 1 0 0 0 1 1 Magazine(print or digital),Mobile data plan,Mo...
10 11 29 California Female East Asian or Southeast Asian $50,000 to $99,999 Yes Unemployed 24-29 West ... 0 0 1 0 0 0 0 0 0 Home Internet,Mobile data plan
11 12 29 New Jersey Female Hispanic $50,000 to $99,999 No Employed full-time or part-time 24-29 Northeast ... 1 1 1 1 1 0 1 1 1 Gaming,Home Internet,Landline telephone,Magazi...
12 13 29 Maine Male White or Caucasian (Non-Hispanic) $50,000 to $99,999 Yes Employed full-time or part-time 24-29 Northeast ... 0 0 0 0 1 0 1 1 1 Home Internet,News/Newspaper,Pay TV,Streaming ...
13 14 29 Washington Male East Asian or Southeast Asian $50,000 to $99,999 No Unemployed 24-29 West ... 1 0 1 0 0 0 1 0 0 Home Internet,Landline telephone,Mobile data p...
14 15 29 Massachusetts Male East Asian or Southeast Asian $100,000 to $299,999 No Employed full-time or part-time 24-29 Northeast ... 0 0 0 0 0 0 1 0 0 Pay TV
15 16 20 Wisconsin Male East Asian or Southeast Asian Less than $29,999 No Student 14-23 Midwest ... 0 0 1 0 0 0 1 0 1 Home Internet,Mobile data plan,Pay TV,Streamin...
16 17 29 Maine Female White or Caucasian (Non-Hispanic) $50,000 to $99,999 Yes Employed full-time or part-time 24-29 Northeast ... 1 0 1 0 1 0 1 0 1 Gaming,Home Internet,Landline telephone,Mobile...
17 18 26 Florida Male White or Caucasian (Non-Hispanic) $50,000 to $99,999 Yes Self-employed 24-29 South ... 1 0 1 1 1 0 1 1 1 Gaming,Home Internet,Landline telephone,Mobile...
18 19 29 Tennessee Female White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Employed full-time or part-time 24-29 South ... 0 0 0 0 0 1 0 0 0
19 20 29 Tennessee Male White or Caucasian (Non-Hispanic) Less than $29,999 Yes Student 24-29 South ... 0 0 1 0 0 0 1 1 1 Gaming,Home Internet,Mobile data plan,Pay TV,S...
20 22 29 Wisconsin Female White or Caucasian (Non-Hispanic) $30,000 to $49,999 Yes Unemployed 24-29 Midwest ... 0 1 1 1 0 0 1 1 1 Gaming,Home Internet,Magazine(print or digital...
21 23 29 North Carolina Male White or Caucasian (Non-Hispanic) $30,000 to $49,999 No Employed full-time or part-time 24-29 South ... 0 0 1 1 0 0 1 0 1 Gaming,Home Internet,Mobile data plan,Mobile v...
22 24 17 California Male Hispanic $50,000 to $99,999 Yes Student 14-23 West ... 1 0 1 1 1 0 1 0 0 Home Internet,Landline telephone,Mobile data p...
23 25 29 Missouri Male White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Employed full-time or part-time 24-29 Midwest ... 0 1 1 0 1 0 1 1 1 Home Internet,Magazine(print or digital),Mobil...
24 26 29 Ohio Male White or Caucasian (Non-Hispanic) Less than $29,999 No Employed full-time or part-time 24-29 Midwest ... 0 1 0 1 1 0 1 0 0 Gaming,Home Internet,Magazine(print or digital...
25 27 29 New Jersey Female White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Employed full-time or part-time 24-29 Northeast ... 1 0 1 1 0 0 1 0 1 Home Internet,Landline telephone,Mobile data p...
26 28 29 Texas Female White or Caucasian (Non-Hispanic) $30,000 to $49,999 No Unemployed 24-29 South ... 0 0 0 0 0 1 0 0 0
27 29 24 Arizona Female White or Caucasian (Non-Hispanic) $30,000 to $49,999 No Student 24-29 West ... 0 0 0 0 0 0 0 0 1 Home Internet,Streaming video service
28 30 29 California Female Hispanic Less than $29,999 Yes Unemployed 24-29 West ... 0 0 1 1 0 0 0 1 1 Home Internet,Mobile data plan,Mobile voice,St...
29 31 29 California Female East Asian or Southeast Asian $50,000 to $99,999 No Employed full-time or part-time 24-29 West ... 0 0 1 0 0 0 0 0 0 Mobile data plan
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
4016 2096 71 South Carolina Male Multiracial $30,000 to $49,999 No Retired 70 or older South ... 1 0 0 0 0 0 1 0 0 Home Internet,Landline telephone,Pay TV
4017 2097 73 Missouri Male White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Retired 70 or older Midwest ... 1 1 1 1 0 0 1 0 0 Home Internet,Landline telephone,Magazine(prin...
4018 2099 73 Florida Male White or Caucasian (Non-Hispanic) Less than $29,999 No Self-employed 70 or older South ... 0 0 1 1 0 0 1 1 1 Gaming,Home Internet,Mobile data plan,Mobile v...
4019 2100 18 Louisiana Male White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Student 14-19 South ... 0 0 1 1 1 0 0 1 1 Gaming,Home Internet,Mobile data plan,Mobile v...
4020 2101 18 South Dakota Female White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Unemployed 14-19 Midwest ... 0 1 1 0 0 0 0 0 0 Magazine(print or digital),Mobile data plan
4021 2102 15 New York Male White or Caucasian (Non-Hispanic) $100,000 to $299,999 Yes Self-employed 14-19 Northeast ... 1 1 1 1 1 0 1 1 1 Gaming,Home Internet,Landline telephone,Magazi...
4022 2103 15 Florida Female White or Caucasian (Non-Hispanic) $100,000 to $299,999 Yes Employed full-time or part-time 14-19 South ... 0 1 1 1 1 0 1 0 1 Magazine(print or digital),Mobile data plan,Mo...
4023 2105 71 Idaho Female White or Caucasian (Non-Hispanic) Less than $29,999 No Retired 70 or older West ... 1 0 0 0 0 0 0 0 0 Landline telephone
4024 2106 76 Wisconsin Female White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Retired 70 or older Midwest ... 1 1 1 0 1 0 1 0 0 Home Internet,Landline telephone,Magazine(prin...
4025 2107 18 California Female Hispanic $30,000 to $49,999 No Student 14-19 West ... 0 0 0 0 0 1 0 0 0
4026 2109 73 Pennsylvania Female White or Caucasian (Non-Hispanic) $30,000 to $49,999 No Retired 70 or older Northeast ... 1 0 0 0 0 0 1 0 1 Landline telephone,Pay TV,Streaming video service
4027 2110 71 Virginia Male White or Caucasian (Non-Hispanic) Less than $29,999 No Retired 70 or older South ... 0 0 0 0 0 0 1 0 0 Pay TV
4028 2111 77 Michigan Female Multiracial $30,000 to $49,999 No Retired 70 or older Midwest ... 1 0 0 0 0 0 1 0 0 Home Internet,Landline telephone,Pay TV
4029 2112 18 Texas Male White or Caucasian (Non-Hispanic) $30,000 to $49,999 Yes Student 14-19 South ... 0 0 0 1 0 0 0 0 1 Home Internet,Mobile voice,Streaming video ser...
4030 2113 71 Alabama Male White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Retired 70 or older South ... 1 1 1 1 0 0 1 1 1 Home Internet,Landline telephone,Magazine(prin...
4031 2115 14 Kansas Male White or Caucasian (Non-Hispanic) $50,000 to $99,999 Yes Self-employed 14-19 Midwest ... 1 1 0 0 0 0 0 1 1 Gaming,Home Internet,Landline telephone,Magazi...
4032 2116 70 Florida Female White or Caucasian (Non-Hispanic) Less than $29,999 No Retired 70 or older South ... 1 0 0 0 0 0 0 0 0 Home Internet,Landline telephone
4033 2117 19 Maryland Female White or Caucasian (Non-Hispanic) $100,000 to $299,999 No Student 14-19 South ... 1 1 1 0 1 0 1 1 1 Home Internet,Landline telephone,Magazine(prin...
4034 2118 15 Florida Male White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Student 14-19 South ... 0 0 1 1 1 0 0 1 0 Home Internet,Mobile data plan,Mobile voice,Ne...
4035 2119 14 Illinois Male White or Caucasian (Non-Hispanic) $100,000 to $299,999 Yes Student 14-19 Midwest ... 1 0 1 1 0 0 1 0 0 Home Internet,Landline telephone,Mobile data p...
4036 2120 70 Washington Female White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Retired 70 or older West ... 1 1 0 0 1 0 1 0 0 Home Internet,Landline telephone,Magazine(prin...
4037 2121 19 Virginia Female White or Caucasian (Non-Hispanic) Less than $29,999 Yes Unemployed 14-19 South ... 1 0 1 0 0 0 1 1 1 Home Internet,Landline telephone,Mobile data p...
4038 2122 18 Florida Male White or Caucasian (Non-Hispanic) $100,000 to $299,999 No Student 14-19 South ... 1 1 1 1 1 0 1 1 1 Gaming,Home Internet,Landline telephone,Magazi...
4039 2124 15 Oregon Female White or Caucasian (Non-Hispanic) $100,000 to $299,999 Yes Student 14-19 West ... 0 0 1 0 0 0 1 0 1 Gaming,Mobile data plan,Pay TV,Streaming video...
4040 2125 14 Ohio Male Multiracial Less than $29,999 Yes Student 14-19 Midwest ... 1 0 0 0 0 0 1 0 1 Home Internet,Landline telephone,Pay TV,Stream...
4041 2126 14 Oregon Male White or Caucasian (Non-Hispanic) $100,000 to $299,999 Yes Unemployed 14-19 West ... 0 1 0 0 0 0 0 0 0 Home Internet,Magazine(print or digital)
4042 2127 70 Massachusetts Male White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Retired 70 or older Northeast ... 1 0 0 0 0 0 0 0 0 Home Internet,Landline telephone
4043 2128 18 Alabama Male White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Unemployed 14-19 South ... 0 0 0 0 0 0 1 0 0 Gaming,Home Internet,Pay TV
4044 2129 79 Illinois Female White or Caucasian (Non-Hispanic) Less than $29,999 No Retired 70 or older Midwest ... 0 1 0 0 1 0 1 0 0 Home Internet,Magazine(print or digital),News/...
4045 2130 77 Pennsylvania Female Multiracial $30,000 to $49,999 No Retired 70 or older Northeast ... 1 0 0 1 0 0 0 0 0 Home Internet,Landline telephone,Mobile voice

4046 rows × 25 columns

In [22]:
#introduce a column for future products to capture all the fucture products that the comsumers wish to own
df_Q_10['future products'] = 0

for index , row in df_Q_8.iterrows():  
    list_futureproducts = []
    if(df_Q_10.iloc[index,12] == 1):
        list_futureproducts.append("Basic mobile phone")
    if(df_Q_10.iloc[index,13] == 1):
        list_futureproducts.append('Blu-ray disc player/DVD player')    
    if(df_Q_10.iloc[index,14] == 1):
        list_futureproducts.append('Computer network/router in your home for wireless computer/laptop usage')  
    if(df_Q_10.iloc[index,15] == 1):
        list_futureproducts.append("Dedicated e-book reader")
    if(df_Q_10.iloc[index,16] == 1):
        list_futureproducts.append("Desktop computer") 
    if(df_Q_10.iloc[index,17] == 1):
        list_futureproducts.append("Digital video recorder")  
    if(df_Q_10.iloc[index,19] == 1):
        list_futureproducts.append("Drone")  
    if(df_Q_10.iloc[index,20] == 1):
        list_futureproducts.append("Fitness band")   
    if(df_Q_10.iloc[index,21] == 1):
        list_futureproducts.append("Flat panel television")   
    if(df_Q_10.iloc[index,22] == 1):
        list_futureproducts.append("Gaming console")   
    if(df_Q_10.iloc[index,23] == 1):
        list_futureproducts.append("Laptop computer")   
    if(df_Q_10.iloc[index,25] == 1):
        list_futureproducts.append("Over-the-air digital TV antenna ")   
    if(df_Q_10.iloc[index,26] == 1):
        list_futureproducts.append("Portable streaming thumb drive/fob")   
    if(df_Q_10.iloc[index,27] == 1):
        list_futureproducts.append("Portable video game player")  
    if(df_Q_10.iloc[index,28] == 1):
        list_futureproducts.append("Smart watch")  
    if(df_Q_10.iloc[index,29] == 1):
        list_futureproducts.append("Smartphone")  
    if(df_Q_10.iloc[index,30] == 1):
        list_futureproducts.append("Streaming media box or over-the-top box")  
    if(df_Q_10.iloc[index,31] == 1):
        list_futureproducts.append("Tablet")
    if(df_Q_10.iloc[index,32] == 1):
        list_futureproducts.append("Virtual reality headset") 
#     print(list_products)    
    listToStrg = ','.join([str(elem) for elem in list_futureproducts])
                    
    df_Q_10.iloc[index,33] = listToStrg
    

df_Q_10 
Out[22]:
index Age State Gender Ethnicity Household Income Children in Household? Employment status Age range Region ... Q10 - Of those products you indicated you do not currently own, which of the following do you plan to purchase in the next 12 months?-None of the above Q10 - Of those products you indicated you do not currently own, which of the following do you plan to purchase in the next 12 months?-Over-the-air digital TV antenna (for free access to network broadcast without pay TV subscription) Q10 - Of those products you indicated you do not currently own, which of the following do you plan to purchase in the next 12 months?-Portable streaming thumb drive/fob Q10 - Of those products you indicated you do not currently own, which of the following do you plan to purchase in the next 12 months?-Portable video game player Q10 - Of those products you indicated you do not currently own, which of the following do you plan to purchase in the next 12 months?-Smart watch Q10 - Of those products you indicated you do not currently own, which of the following do you plan to purchase in the next 12 months?-Smartphone Q10 - Of those products you indicated you do not currently own, which of the following do you plan to purchase in the next 12 months?-Streaming media box or over-the-top box Q10 - Of those products you indicated you do not currently own, which of the following do you plan to purchase in the next 12 months?-Tablet Q10 - Of those products you indicated you do not currently own, which of the following do you plan to purchase in the next 12 months?-Virtual reality headset future products
0 0 29 New York Female White or Caucasian (Non-Hispanic) $50,000 to $99,999 Yes Employed full-time or part-time 24-29 Northeast ... 0 0 0 0 0 0 0 0 0 Drone,Flat panel television
1 1 29 California Male White or Caucasian (Non-Hispanic) $50,000 to $99,999 Yes Employed full-time or part-time 24-29 West ... 0 1 0 0 0 0 0 0 0 Computer network/router in your home for wirel...
2 2 29 Colorado Female White or Caucasian (Non-Hispanic) Less than $29,999 No Employed full-time or part-time 24-29 West ... 1 0 0 0 0 0 0 0 0
3 3 29 South Carolina Female White or Caucasian (Non-Hispanic) $30,000 to $49,999 Yes Student 24-29 South ... 0 0 0 1 0 0 0 0 0 Portable video game player
4 4 29 Virginia Male White or Caucasian (Non-Hispanic) $50,000 to $99,999 Yes Employed full-time or part-time 24-29 South ... 0 0 0 0 0 0 0 0 0 Drone,Fitness band
5 5 29 Pennsylvania Male Multiracial $50,000 to $99,999 No Unemployed 24-29 Northeast ... 0 0 0 1 0 0 0 1 0 Computer network/router in your home for wirel...
6 6 24 Michigan Female White or Caucasian (Non-Hispanic) Less than $29,999 No Employed full-time or part-time 24-29 Midwest ... 1 0 0 0 0 0 0 0 0
7 7 29 New Jersey Female White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Employed full-time or part-time 24-29 Northeast ... 0 0 0 0 0 0 1 1 0 Streaming media box or over-the-top box,Tablet
8 9 29 Florida Male Multiracial Less than $29,999 No Student 24-29 South ... 0 0 0 0 0 0 0 0 0 Digital video recorder,Fitness band
9 10 29 New York Female African American $100,000 to $299,999 Yes Employed full-time or part-time 24-29 Northeast ... 0 1 0 0 1 1 1 0 0 Computer network/router in your home for wirel...
10 11 29 California Female East Asian or Southeast Asian $50,000 to $99,999 Yes Unemployed 24-29 West ... 0 0 0 0 0 0 0 1 0 Tablet
11 12 29 New Jersey Female Hispanic $50,000 to $99,999 No Employed full-time or part-time 24-29 Northeast ... 1 0 0 0 0 0 0 0 0
12 13 29 Maine Male White or Caucasian (Non-Hispanic) $50,000 to $99,999 Yes Employed full-time or part-time 24-29 Northeast ... 0 0 0 0 1 0 0 0 0 Digital video recorder,Fitness band,Smart watch
13 14 29 Washington Male East Asian or Southeast Asian $50,000 to $99,999 No Unemployed 24-29 West ... 0 0 0 0 0 0 0 0 0 Computer network/router in your home for wirel...
14 15 29 Massachusetts Male East Asian or Southeast Asian $100,000 to $299,999 No Employed full-time or part-time 24-29 Northeast ... 0 0 0 0 0 0 0 0 1 Dedicated e-book reader,Virtual reality headset
15 16 20 Wisconsin Male East Asian or Southeast Asian Less than $29,999 No Student 14-23 Midwest ... 1 0 0 0 0 0 0 0 0
16 17 29 Maine Female White or Caucasian (Non-Hispanic) $50,000 to $99,999 Yes Employed full-time or part-time 24-29 Northeast ... 0 0 0 0 0 0 1 0 0 Streaming media box or over-the-top box
17 18 26 Florida Male White or Caucasian (Non-Hispanic) $50,000 to $99,999 Yes Self-employed 24-29 South ... 0 0 1 0 0 0 1 0 1 Dedicated e-book reader,Portable streaming thu...
18 19 29 Tennessee Female White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Employed full-time or part-time 24-29 South ... 1 0 0 0 0 0 0 0 0
19 20 29 Tennessee Male White or Caucasian (Non-Hispanic) Less than $29,999 Yes Student 24-29 South ... 0 0 0 0 0 1 0 0 0 Smartphone
20 22 29 Wisconsin Female White or Caucasian (Non-Hispanic) $30,000 to $49,999 Yes Unemployed 24-29 Midwest ... 0 0 0 0 0 0 0 0 0 Drone
21 23 29 North Carolina Male White or Caucasian (Non-Hispanic) $30,000 to $49,999 No Employed full-time or part-time 24-29 South ... 0 0 0 0 1 0 1 0 0 Fitness band,Smart watch,Streaming media box o...
22 24 17 California Male Hispanic $50,000 to $99,999 Yes Student 14-23 West ... 0 0 0 0 0 0 1 0 0 Digital video recorder,Streaming media box or ...
23 25 29 Missouri Male White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Employed full-time or part-time 24-29 Midwest ... 0 0 0 0 0 0 0 0 1 Dedicated e-book reader,Fitness band,Virtual r...
24 26 29 Ohio Male White or Caucasian (Non-Hispanic) Less than $29,999 No Employed full-time or part-time 24-29 Midwest ... 0 0 0 0 0 0 0 1 0 Desktop computer,Tablet
25 27 29 New Jersey Female White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Employed full-time or part-time 24-29 Northeast ... 0 0 0 0 0 0 0 0 0 Blu-ray disc player/DVD player,Fitness band
26 28 29 Texas Female White or Caucasian (Non-Hispanic) $30,000 to $49,999 No Unemployed 24-29 South ... 1 0 0 0 0 0 0 0 0
27 29 24 Arizona Female White or Caucasian (Non-Hispanic) $30,000 to $49,999 No Student 24-29 West ... 0 0 0 0 0 0 0 0 0 Fitness band
28 30 29 California Female Hispanic Less than $29,999 Yes Unemployed 24-29 West ... 1 0 0 0 0 0 0 0 0
29 31 29 California Female East Asian or Southeast Asian $50,000 to $99,999 No Employed full-time or part-time 24-29 West ... 1 0 0 0 0 0 0 0 0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
4016 2096 71 South Carolina Male Multiracial $30,000 to $49,999 No Retired 70 or older South ... 1 0 0 0 0 0 0 0 0
4017 2097 73 Missouri Male White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Retired 70 or older Midwest ... 1 0 0 0 0 0 0 0 0
4018 2099 73 Florida Male White or Caucasian (Non-Hispanic) Less than $29,999 No Self-employed 70 or older South ... 0 0 0 0 0 0 0 0 0 Dedicated e-book reader,Drone
4019 2100 18 Louisiana Male White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Student 14-19 South ... 0 0 0 0 0 0 0 0 0 Laptop computer
4020 2101 18 South Dakota Female White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Unemployed 14-19 Midwest ... 1 0 0 0 0 0 0 0 0
4021 2102 15 New York Male White or Caucasian (Non-Hispanic) $100,000 to $299,999 Yes Self-employed 14-19 Northeast ... 1 0 0 0 0 0 0 0 0
4022 2103 15 Florida Female White or Caucasian (Non-Hispanic) $100,000 to $299,999 Yes Employed full-time or part-time 14-19 South ... 0 0 0 0 1 0 0 1 0 Dedicated e-book reader,Smart watch,Tablet
4023 2105 71 Idaho Female White or Caucasian (Non-Hispanic) Less than $29,999 No Retired 70 or older West ... 1 0 0 0 0 0 0 0 0
4024 2106 76 Wisconsin Female White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Retired 70 or older Midwest ... 0 0 0 0 0 1 0 1 0 Smartphone,Tablet
4025 2107 18 California Female Hispanic $30,000 to $49,999 No Student 14-19 West ... 0 0 0 0 1 0 0 0 1 Basic mobile phone,Dedicated e-book reader,Dro...
4026 2109 73 Pennsylvania Female White or Caucasian (Non-Hispanic) $30,000 to $49,999 No Retired 70 or older Northeast ... 0 1 0 1 1 0 1 0 1 Basic mobile phone,Digital video recorder,Dron...
4027 2110 71 Virginia Male White or Caucasian (Non-Hispanic) Less than $29,999 No Retired 70 or older South ... 1 0 0 0 0 0 0 0 0
4028 2111 77 Michigan Female Multiracial $30,000 to $49,999 No Retired 70 or older Midwest ... 0 0 0 0 0 0 0 0 0 Blu-ray disc player/DVD player
4029 2112 18 Texas Male White or Caucasian (Non-Hispanic) $30,000 to $49,999 Yes Student 14-19 South ... 0 0 0 0 0 0 0 0 0 Digital video recorder
4030 2113 71 Alabama Male White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Retired 70 or older South ... 0 0 0 0 0 0 1 0 0 Streaming media box or over-the-top box
4031 2115 14 Kansas Male White or Caucasian (Non-Hispanic) $50,000 to $99,999 Yes Self-employed 14-19 Midwest ... 0 1 1 1 0 0 0 0 0 Dedicated e-book reader,Over-the-air digital T...
4032 2116 70 Florida Female White or Caucasian (Non-Hispanic) Less than $29,999 No Retired 70 or older South ... 1 0 0 0 0 0 0 0 0
4033 2117 19 Maryland Female White or Caucasian (Non-Hispanic) $100,000 to $299,999 No Student 14-19 South ... 0 0 1 0 0 0 0 0 0 Blu-ray disc player/DVD player,Drone,Portable ...
4034 2118 15 Florida Male White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Student 14-19 South ... 0 0 0 1 1 1 0 0 0 Basic mobile phone,Dedicated e-book reader,Des...
4035 2119 14 Illinois Male White or Caucasian (Non-Hispanic) $100,000 to $299,999 Yes Student 14-19 Midwest ... 0 0 0 0 1 1 0 0 0 Desktop computer,Smart watch,Smartphone
4036 2120 70 Washington Female White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Retired 70 or older West ... 1 0 0 0 0 0 0 0 0
4037 2121 19 Virginia Female White or Caucasian (Non-Hispanic) Less than $29,999 Yes Unemployed 14-19 South ... 0 0 0 0 1 0 0 0 0 Smart watch
4038 2122 18 Florida Male White or Caucasian (Non-Hispanic) $100,000 to $299,999 No Student 14-19 South ... 1 0 0 0 0 0 0 0 0
4039 2124 15 Oregon Female White or Caucasian (Non-Hispanic) $100,000 to $299,999 Yes Student 14-19 West ... 0 0 0 0 0 0 0 0 0 Drone
4040 2125 14 Ohio Male Multiracial Less than $29,999 Yes Student 14-19 Midwest ... 0 0 0 0 0 0 0 0 0 Blu-ray disc player/DVD player,Drone,Gaming co...
4041 2126 14 Oregon Male White or Caucasian (Non-Hispanic) $100,000 to $299,999 Yes Unemployed 14-19 West ... 0 1 1 1 1 1 0 0 0 Dedicated e-book reader,Desktop computer,Over-...
4042 2127 70 Massachusetts Male White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Retired 70 or older Northeast ... 1 0 0 0 0 0 0 0 0
4043 2128 18 Alabama Male White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Unemployed 14-19 South ... 0 0 0 0 0 0 0 0 1 Laptop computer,Virtual reality headset
4044 2129 79 Illinois Female White or Caucasian (Non-Hispanic) Less than $29,999 No Retired 70 or older Midwest ... 1 0 0 0 0 0 0 0 0
4045 2130 77 Pennsylvania Female Multiracial $30,000 to $49,999 No Retired 70 or older Northeast ... 1 0 0 0 0 0 0 0 0

4046 rows × 34 columns

In [23]:
#Making a new dataframe with only features and products

df1 = df_Q_8.iloc[:,0:12]
df2 = df_Q_8.iloc[:,-1].to_frame()


df_new_8=pd.concat([df1 , df2 ], axis=1)
df_new_8
Out[23]:
index Age State Gender Ethnicity Household Income Children in Household? Employment status Age range Region year quantity products
0 0 29 New York Female White or Caucasian (Non-Hispanic) $50,000 to $99,999 Yes Employed full-time or part-time 24-29 Northeast 2010 1 Computer network/router in your home for wirel...
1 1 29 California Male White or Caucasian (Non-Hispanic) $50,000 to $99,999 Yes Employed full-time or part-time 24-29 West 2010 1 Blu-ray disc player/DVD player,Desktop compute...
2 2 29 Colorado Female White or Caucasian (Non-Hispanic) Less than $29,999 No Employed full-time or part-time 24-29 West 2010 1 Blu-ray disc player/DVD player,Computer networ...
3 3 29 South Carolina Female White or Caucasian (Non-Hispanic) $30,000 to $49,999 Yes Student 24-29 South 2010 1 Computer network/router in your home for wirel...
4 4 29 Virginia Male White or Caucasian (Non-Hispanic) $50,000 to $99,999 Yes Employed full-time or part-time 24-29 South 2010 1 Blu-ray disc player/DVD player,Desktop compute...
5 5 29 Pennsylvania Male Multiracial $50,000 to $99,999 No Unemployed 24-29 Northeast 2010 1 Basic mobile phone,Blu-ray disc player/DVD pla...
6 6 24 Michigan Female White or Caucasian (Non-Hispanic) Less than $29,999 No Employed full-time or part-time 24-29 Midwest 2010 1 Computer network/router in your home for wirel...
7 7 29 New Jersey Female White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Employed full-time or part-time 24-29 Northeast 2010 1 Blu-ray disc player/DVD player,Computer networ...
8 9 29 Florida Male Multiracial Less than $29,999 No Student 24-29 South 2010 1 Basic mobile phone,Computer network/router in ...
9 10 29 New York Female African American $100,000 to $299,999 Yes Employed full-time or part-time 24-29 Northeast 2010 1 Basic mobile phone,Blu-ray disc player/DVD pla...
10 11 29 California Female East Asian or Southeast Asian $50,000 to $99,999 Yes Unemployed 24-29 West 2010 1 Basic mobile phone,Blu-ray disc player/DVD pla...
11 12 29 New Jersey Female Hispanic $50,000 to $99,999 No Employed full-time or part-time 24-29 Northeast 2010 1 Basic mobile phone,Blu-ray disc player/DVD pla...
12 13 29 Maine Male White or Caucasian (Non-Hispanic) $50,000 to $99,999 Yes Employed full-time or part-time 24-29 Northeast 2010 1 Blu-ray disc player/DVD player,Flat panel tele...
13 14 29 Washington Male East Asian or Southeast Asian $50,000 to $99,999 No Unemployed 24-29 West 2010 1 Gaming console,Laptop computer,Smartphone
14 15 29 Massachusetts Male East Asian or Southeast Asian $100,000 to $299,999 No Employed full-time or part-time 24-29 Northeast 2010 1 Desktop computer,Fitness band,Gaming console,L...
15 16 20 Wisconsin Male East Asian or Southeast Asian Less than $29,999 No Student 14-23 Midwest 2010 1 Computer network/router in your home for wirel...
16 17 29 Maine Female White or Caucasian (Non-Hispanic) $50,000 to $99,999 Yes Employed full-time or part-time 24-29 Northeast 2010 1 Blu-ray disc player/DVD player,Computer networ...
17 18 26 Florida Male White or Caucasian (Non-Hispanic) $50,000 to $99,999 Yes Self-employed 24-29 South 2010 1 Blu-ray disc player/DVD player,Desktop compute...
18 19 29 Tennessee Female White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Employed full-time or part-time 24-29 South 2010 1 Computer network/router in your home for wirel...
19 20 29 Tennessee Male White or Caucasian (Non-Hispanic) Less than $29,999 Yes Student 24-29 South 2010 1 Basic mobile phone,Blu-ray disc player/DVD pla...
20 22 29 Wisconsin Female White or Caucasian (Non-Hispanic) $30,000 to $49,999 Yes Unemployed 24-29 Midwest 2010 1 Blu-ray disc player/DVD player,Computer networ...
21 23 29 North Carolina Male White or Caucasian (Non-Hispanic) $30,000 to $49,999 No Employed full-time or part-time 24-29 South 2010 1 Blu-ray disc player/DVD player,Computer networ...
22 24 17 California Male Hispanic $50,000 to $99,999 Yes Student 14-23 West 2010 1 Blu-ray disc player/DVD player,Computer networ...
23 25 29 Missouri Male White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Employed full-time or part-time 24-29 Midwest 2010 1 Blu-ray disc player/DVD player,Desktop compute...
24 26 29 Ohio Male White or Caucasian (Non-Hispanic) Less than $29,999 No Employed full-time or part-time 24-29 Midwest 2010 1 Digital video recorder,Flat panel television,G...
25 27 29 New Jersey Female White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Employed full-time or part-time 24-29 Northeast 2010 1 Computer network/router in your home for wirel...
26 28 29 Texas Female White or Caucasian (Non-Hispanic) $30,000 to $49,999 No Unemployed 24-29 South 2010 1 Gaming console,Laptop computer,Smartphone
27 29 24 Arizona Female White or Caucasian (Non-Hispanic) $30,000 to $49,999 No Student 24-29 West 2010 1 Blu-ray disc player/DVD player,Computer networ...
28 30 29 California Female Hispanic Less than $29,999 Yes Unemployed 24-29 West 2010 1 Blu-ray disc player/DVD player,Fitness band,Fl...
29 31 29 California Female East Asian or Southeast Asian $50,000 to $99,999 No Employed full-time or part-time 24-29 West 2010 1 Computer network/router in your home for wirel...
... ... ... ... ... ... ... ... ... ... ... ... ... ...
4016 2096 71 South Carolina Male Multiracial $30,000 to $49,999 No Retired 70 or older South 2011 1 Blu-ray disc player/DVD player,Desktop compute...
4017 2097 73 Missouri Male White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Retired 70 or older Midwest 2011 1 Computer network/router in your home for wirel...
4018 2099 73 Florida Male White or Caucasian (Non-Hispanic) Less than $29,999 No Self-employed 70 or older South 2011 1 Blu-ray disc player/DVD player,Flat panel tele...
4019 2100 18 Louisiana Male White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Student 14-19 South 2011 1 Blu-ray disc player/DVD player,Computer networ...
4020 2101 18 South Dakota Female White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Unemployed 14-19 Midwest 2011 1 Gaming console,Laptop computer
4021 2102 15 New York Male White or Caucasian (Non-Hispanic) $100,000 to $299,999 Yes Self-employed 14-19 Northeast 2011 1 Blu-ray disc player/DVD player,Computer networ...
4022 2103 15 Florida Female White or Caucasian (Non-Hispanic) $100,000 to $299,999 Yes Employed full-time or part-time 14-19 South 2011 1 Blu-ray disc player/DVD player,Computer networ...
4023 2105 71 Idaho Female White or Caucasian (Non-Hispanic) Less than $29,999 No Retired 70 or older West 2011 1 Basic mobile phone,Blu-ray disc player/DVD pla...
4024 2106 76 Wisconsin Female White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Retired 70 or older Midwest 2011 1 Basic mobile phone,Blu-ray disc player/DVD pla...
4025 2107 18 California Female Hispanic $30,000 to $49,999 No Student 14-19 West 2011 1 Blu-ray disc player/DVD player,Desktop compute...
4026 2109 73 Pennsylvania Female White or Caucasian (Non-Hispanic) $30,000 to $49,999 No Retired 70 or older Northeast 2011 1 Blu-ray disc player/DVD player,Computer networ...
4027 2110 71 Virginia Male White or Caucasian (Non-Hispanic) Less than $29,999 No Retired 70 or older South 2011 1 Basic mobile phone,Blu-ray disc player/DVD pla...
4028 2111 77 Michigan Female Multiracial $30,000 to $49,999 No Retired 70 or older Midwest 2011 1 Basic mobile phone,Computer network/router in ...
4029 2112 18 Texas Male White or Caucasian (Non-Hispanic) $30,000 to $49,999 Yes Student 14-19 South 2011 1 Blu-ray disc player/DVD player,Computer networ...
4030 2113 71 Alabama Male White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Retired 70 or older South 2011 1 Blu-ray disc player/DVD player,Computer networ...
4031 2115 14 Kansas Male White or Caucasian (Non-Hispanic) $50,000 to $99,999 Yes Self-employed 14-19 Midwest 2011 1 Blu-ray disc player/DVD player,Computer networ...
4032 2116 70 Florida Female White or Caucasian (Non-Hispanic) Less than $29,999 No Retired 70 or older South 2011 1 Basic mobile phone,Computer network/router in ...
4033 2117 19 Maryland Female White or Caucasian (Non-Hispanic) $100,000 to $299,999 No Student 14-19 South 2011 1 Computer network/router in your home for wirel...
4034 2118 15 Florida Male White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Student 14-19 South 2011 1 Blu-ray disc player/DVD player,Digital video r...
4035 2119 14 Illinois Male White or Caucasian (Non-Hispanic) $100,000 to $299,999 Yes Student 14-19 Midwest 2011 1 Dedicated e-book reader,Digital video recorder...
4036 2120 70 Washington Female White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Retired 70 or older West 2011 1 Blu-ray disc player/DVD player,Computer networ...
4037 2121 19 Virginia Female White or Caucasian (Non-Hispanic) Less than $29,999 Yes Unemployed 14-19 South 2011 1 Blu-ray disc player/DVD player,Dedicated e-boo...
4038 2122 18 Florida Male White or Caucasian (Non-Hispanic) $100,000 to $299,999 No Student 14-19 South 2011 1 Computer network/router in your home for wirel...
4039 2124 15 Oregon Female White or Caucasian (Non-Hispanic) $100,000 to $299,999 Yes Student 14-19 West 2011 1 Computer network/router in your home for wirel...
4040 2125 14 Ohio Male Multiracial Less than $29,999 Yes Student 14-19 Midwest 2011 1 Flat panel television,Smartphone,Tablet,Virtua...
4041 2126 14 Oregon Male White or Caucasian (Non-Hispanic) $100,000 to $299,999 Yes Unemployed 14-19 West 2011 1 Blu-ray disc player/DVD player,Computer networ...
4042 2127 70 Massachusetts Male White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Retired 70 or older Northeast 2011 1 Blu-ray disc player/DVD player,Computer networ...
4043 2128 18 Alabama Male White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Unemployed 14-19 South 2011 1 Basic mobile phone,Computer network/router in ...
4044 2129 79 Illinois Female White or Caucasian (Non-Hispanic) Less than $29,999 No Retired 70 or older Midwest 2011 1 Basic mobile phone,Desktop computer,Flat panel...
4045 2130 77 Pennsylvania Female Multiracial $30,000 to $49,999 No Retired 70 or older Northeast 2011 1 Basic mobile phone,Blu-ray disc player/DVD pla...

4046 rows × 13 columns

In [24]:
#Making a new dataframe with only features and subscrption

df1 = df_Q_26.iloc[:,0:12]
df2 = df_Q_26.iloc[:,-1].to_frame()


df_new_26=pd.concat([df1 , df2 ], axis=1)
df_new_26
Out[24]:
index Age State Gender Ethnicity Household Income Children in Household? Employment status Age range Region year quantity subscriptions
0 0 29 New York Female White or Caucasian (Non-Hispanic) $50,000 to $99,999 Yes Employed full-time or part-time 24-29 Northeast 2010 1 Magazine(print or digital),Pay TV
1 1 29 California Male White or Caucasian (Non-Hispanic) $50,000 to $99,999 Yes Employed full-time or part-time 24-29 West 2010 1 Home Internet,Landline telephone,Mobile data p...
2 2 29 Colorado Female White or Caucasian (Non-Hispanic) Less than $29,999 No Employed full-time or part-time 24-29 West 2010 1 Mobile data plan,Mobile voice,Streaming music ...
3 3 29 South Carolina Female White or Caucasian (Non-Hispanic) $30,000 to $49,999 Yes Student 24-29 South 2010 1 Home Internet,Mobile data plan
4 4 29 Virginia Male White or Caucasian (Non-Hispanic) $50,000 to $99,999 Yes Employed full-time or part-time 24-29 South 2010 1 Gaming
5 5 29 Pennsylvania Male Multiracial $50,000 to $99,999 No Unemployed 24-29 Northeast 2010 1 Gaming,Home Internet,Magazine(print or digital...
6 6 24 Michigan Female White or Caucasian (Non-Hispanic) Less than $29,999 No Employed full-time or part-time 24-29 Midwest 2010 1 Gaming,Home Internet,Pay TV,Streaming video se...
7 7 29 New Jersey Female White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Employed full-time or part-time 24-29 Northeast 2010 1 Home Internet,Landline telephone,Magazine(prin...
8 9 29 Florida Male Multiracial Less than $29,999 No Student 24-29 South 2010 1 Home Internet,Mobile data plan,Pay TV,Streamin...
9 10 29 New York Female African American $100,000 to $299,999 Yes Employed full-time or part-time 24-29 Northeast 2010 1 Magazine(print or digital),Mobile data plan,Mo...
10 11 29 California Female East Asian or Southeast Asian $50,000 to $99,999 Yes Unemployed 24-29 West 2010 1 Home Internet,Mobile data plan
11 12 29 New Jersey Female Hispanic $50,000 to $99,999 No Employed full-time or part-time 24-29 Northeast 2010 1 Gaming,Home Internet,Landline telephone,Magazi...
12 13 29 Maine Male White or Caucasian (Non-Hispanic) $50,000 to $99,999 Yes Employed full-time or part-time 24-29 Northeast 2010 1 Home Internet,News/Newspaper,Pay TV,Streaming ...
13 14 29 Washington Male East Asian or Southeast Asian $50,000 to $99,999 No Unemployed 24-29 West 2010 1 Home Internet,Landline telephone,Mobile data p...
14 15 29 Massachusetts Male East Asian or Southeast Asian $100,000 to $299,999 No Employed full-time or part-time 24-29 Northeast 2010 1 Pay TV
15 16 20 Wisconsin Male East Asian or Southeast Asian Less than $29,999 No Student 14-23 Midwest 2010 1 Home Internet,Mobile data plan,Pay TV,Streamin...
16 17 29 Maine Female White or Caucasian (Non-Hispanic) $50,000 to $99,999 Yes Employed full-time or part-time 24-29 Northeast 2010 1 Gaming,Home Internet,Landline telephone,Mobile...
17 18 26 Florida Male White or Caucasian (Non-Hispanic) $50,000 to $99,999 Yes Self-employed 24-29 South 2010 1 Gaming,Home Internet,Landline telephone,Mobile...
18 19 29 Tennessee Female White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Employed full-time or part-time 24-29 South 2010 1
19 20 29 Tennessee Male White or Caucasian (Non-Hispanic) Less than $29,999 Yes Student 24-29 South 2010 1 Gaming,Home Internet,Mobile data plan,Pay TV,S...
20 22 29 Wisconsin Female White or Caucasian (Non-Hispanic) $30,000 to $49,999 Yes Unemployed 24-29 Midwest 2010 1 Gaming,Home Internet,Magazine(print or digital...
21 23 29 North Carolina Male White or Caucasian (Non-Hispanic) $30,000 to $49,999 No Employed full-time or part-time 24-29 South 2010 1 Gaming,Home Internet,Mobile data plan,Mobile v...
22 24 17 California Male Hispanic $50,000 to $99,999 Yes Student 14-23 West 2010 1 Home Internet,Landline telephone,Mobile data p...
23 25 29 Missouri Male White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Employed full-time or part-time 24-29 Midwest 2010 1 Home Internet,Magazine(print or digital),Mobil...
24 26 29 Ohio Male White or Caucasian (Non-Hispanic) Less than $29,999 No Employed full-time or part-time 24-29 Midwest 2010 1 Gaming,Home Internet,Magazine(print or digital...
25 27 29 New Jersey Female White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Employed full-time or part-time 24-29 Northeast 2010 1 Home Internet,Landline telephone,Mobile data p...
26 28 29 Texas Female White or Caucasian (Non-Hispanic) $30,000 to $49,999 No Unemployed 24-29 South 2010 1
27 29 24 Arizona Female White or Caucasian (Non-Hispanic) $30,000 to $49,999 No Student 24-29 West 2010 1 Home Internet,Streaming video service
28 30 29 California Female Hispanic Less than $29,999 Yes Unemployed 24-29 West 2010 1 Home Internet,Mobile data plan,Mobile voice,St...
29 31 29 California Female East Asian or Southeast Asian $50,000 to $99,999 No Employed full-time or part-time 24-29 West 2010 1 Mobile data plan
... ... ... ... ... ... ... ... ... ... ... ... ... ...
4016 2096 71 South Carolina Male Multiracial $30,000 to $49,999 No Retired 70 or older South 2011 1 Home Internet,Landline telephone,Pay TV
4017 2097 73 Missouri Male White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Retired 70 or older Midwest 2011 1 Home Internet,Landline telephone,Magazine(prin...
4018 2099 73 Florida Male White or Caucasian (Non-Hispanic) Less than $29,999 No Self-employed 70 or older South 2011 1 Gaming,Home Internet,Mobile data plan,Mobile v...
4019 2100 18 Louisiana Male White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Student 14-19 South 2011 1 Gaming,Home Internet,Mobile data plan,Mobile v...
4020 2101 18 South Dakota Female White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Unemployed 14-19 Midwest 2011 1 Magazine(print or digital),Mobile data plan
4021 2102 15 New York Male White or Caucasian (Non-Hispanic) $100,000 to $299,999 Yes Self-employed 14-19 Northeast 2011 1 Gaming,Home Internet,Landline telephone,Magazi...
4022 2103 15 Florida Female White or Caucasian (Non-Hispanic) $100,000 to $299,999 Yes Employed full-time or part-time 14-19 South 2011 1 Magazine(print or digital),Mobile data plan,Mo...
4023 2105 71 Idaho Female White or Caucasian (Non-Hispanic) Less than $29,999 No Retired 70 or older West 2011 1 Landline telephone
4024 2106 76 Wisconsin Female White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Retired 70 or older Midwest 2011 1 Home Internet,Landline telephone,Magazine(prin...
4025 2107 18 California Female Hispanic $30,000 to $49,999 No Student 14-19 West 2011 1
4026 2109 73 Pennsylvania Female White or Caucasian (Non-Hispanic) $30,000 to $49,999 No Retired 70 or older Northeast 2011 1 Landline telephone,Pay TV,Streaming video service
4027 2110 71 Virginia Male White or Caucasian (Non-Hispanic) Less than $29,999 No Retired 70 or older South 2011 1 Pay TV
4028 2111 77 Michigan Female Multiracial $30,000 to $49,999 No Retired 70 or older Midwest 2011 1 Home Internet,Landline telephone,Pay TV
4029 2112 18 Texas Male White or Caucasian (Non-Hispanic) $30,000 to $49,999 Yes Student 14-19 South 2011 1 Home Internet,Mobile voice,Streaming video ser...
4030 2113 71 Alabama Male White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Retired 70 or older South 2011 1 Home Internet,Landline telephone,Magazine(prin...
4031 2115 14 Kansas Male White or Caucasian (Non-Hispanic) $50,000 to $99,999 Yes Self-employed 14-19 Midwest 2011 1 Gaming,Home Internet,Landline telephone,Magazi...
4032 2116 70 Florida Female White or Caucasian (Non-Hispanic) Less than $29,999 No Retired 70 or older South 2011 1 Home Internet,Landline telephone
4033 2117 19 Maryland Female White or Caucasian (Non-Hispanic) $100,000 to $299,999 No Student 14-19 South 2011 1 Home Internet,Landline telephone,Magazine(prin...
4034 2118 15 Florida Male White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Student 14-19 South 2011 1 Home Internet,Mobile data plan,Mobile voice,Ne...
4035 2119 14 Illinois Male White or Caucasian (Non-Hispanic) $100,000 to $299,999 Yes Student 14-19 Midwest 2011 1 Home Internet,Landline telephone,Mobile data p...
4036 2120 70 Washington Female White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Retired 70 or older West 2011 1 Home Internet,Landline telephone,Magazine(prin...
4037 2121 19 Virginia Female White or Caucasian (Non-Hispanic) Less than $29,999 Yes Unemployed 14-19 South 2011 1 Home Internet,Landline telephone,Mobile data p...
4038 2122 18 Florida Male White or Caucasian (Non-Hispanic) $100,000 to $299,999 No Student 14-19 South 2011 1 Gaming,Home Internet,Landline telephone,Magazi...
4039 2124 15 Oregon Female White or Caucasian (Non-Hispanic) $100,000 to $299,999 Yes Student 14-19 West 2011 1 Gaming,Mobile data plan,Pay TV,Streaming video...
4040 2125 14 Ohio Male Multiracial Less than $29,999 Yes Student 14-19 Midwest 2011 1 Home Internet,Landline telephone,Pay TV,Stream...
4041 2126 14 Oregon Male White or Caucasian (Non-Hispanic) $100,000 to $299,999 Yes Unemployed 14-19 West 2011 1 Home Internet,Magazine(print or digital)
4042 2127 70 Massachusetts Male White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Retired 70 or older Northeast 2011 1 Home Internet,Landline telephone
4043 2128 18 Alabama Male White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Unemployed 14-19 South 2011 1 Gaming,Home Internet,Pay TV
4044 2129 79 Illinois Female White or Caucasian (Non-Hispanic) Less than $29,999 No Retired 70 or older Midwest 2011 1 Home Internet,Magazine(print or digital),News/...
4045 2130 77 Pennsylvania Female Multiracial $30,000 to $49,999 No Retired 70 or older Northeast 2011 1 Home Internet,Landline telephone,Mobile voice

4046 rows × 13 columns

In [25]:
#Making a new dataframe with only features and future products

df1 = df_Q_10.iloc[:,0:12]
df2 = df_Q_10.iloc[:,-1].to_frame()


df_new_10=pd.concat([df1 , df2 ], axis=1)
df_new_10
Out[25]:
index Age State Gender Ethnicity Household Income Children in Household? Employment status Age range Region year quantity future products
0 0 29 New York Female White or Caucasian (Non-Hispanic) $50,000 to $99,999 Yes Employed full-time or part-time 24-29 Northeast 2010 1 Drone,Flat panel television
1 1 29 California Male White or Caucasian (Non-Hispanic) $50,000 to $99,999 Yes Employed full-time or part-time 24-29 West 2010 1 Computer network/router in your home for wirel...
2 2 29 Colorado Female White or Caucasian (Non-Hispanic) Less than $29,999 No Employed full-time or part-time 24-29 West 2010 1
3 3 29 South Carolina Female White or Caucasian (Non-Hispanic) $30,000 to $49,999 Yes Student 24-29 South 2010 1 Portable video game player
4 4 29 Virginia Male White or Caucasian (Non-Hispanic) $50,000 to $99,999 Yes Employed full-time or part-time 24-29 South 2010 1 Drone,Fitness band
5 5 29 Pennsylvania Male Multiracial $50,000 to $99,999 No Unemployed 24-29 Northeast 2010 1 Computer network/router in your home for wirel...
6 6 24 Michigan Female White or Caucasian (Non-Hispanic) Less than $29,999 No Employed full-time or part-time 24-29 Midwest 2010 1
7 7 29 New Jersey Female White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Employed full-time or part-time 24-29 Northeast 2010 1 Streaming media box or over-the-top box,Tablet
8 9 29 Florida Male Multiracial Less than $29,999 No Student 24-29 South 2010 1 Digital video recorder,Fitness band
9 10 29 New York Female African American $100,000 to $299,999 Yes Employed full-time or part-time 24-29 Northeast 2010 1 Computer network/router in your home for wirel...
10 11 29 California Female East Asian or Southeast Asian $50,000 to $99,999 Yes Unemployed 24-29 West 2010 1 Tablet
11 12 29 New Jersey Female Hispanic $50,000 to $99,999 No Employed full-time or part-time 24-29 Northeast 2010 1
12 13 29 Maine Male White or Caucasian (Non-Hispanic) $50,000 to $99,999 Yes Employed full-time or part-time 24-29 Northeast 2010 1 Digital video recorder,Fitness band,Smart watch
13 14 29 Washington Male East Asian or Southeast Asian $50,000 to $99,999 No Unemployed 24-29 West 2010 1 Computer network/router in your home for wirel...
14 15 29 Massachusetts Male East Asian or Southeast Asian $100,000 to $299,999 No Employed full-time or part-time 24-29 Northeast 2010 1 Dedicated e-book reader,Virtual reality headset
15 16 20 Wisconsin Male East Asian or Southeast Asian Less than $29,999 No Student 14-23 Midwest 2010 1
16 17 29 Maine Female White or Caucasian (Non-Hispanic) $50,000 to $99,999 Yes Employed full-time or part-time 24-29 Northeast 2010 1 Streaming media box or over-the-top box
17 18 26 Florida Male White or Caucasian (Non-Hispanic) $50,000 to $99,999 Yes Self-employed 24-29 South 2010 1 Dedicated e-book reader,Portable streaming thu...
18 19 29 Tennessee Female White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Employed full-time or part-time 24-29 South 2010 1
19 20 29 Tennessee Male White or Caucasian (Non-Hispanic) Less than $29,999 Yes Student 24-29 South 2010 1 Smartphone
20 22 29 Wisconsin Female White or Caucasian (Non-Hispanic) $30,000 to $49,999 Yes Unemployed 24-29 Midwest 2010 1 Drone
21 23 29 North Carolina Male White or Caucasian (Non-Hispanic) $30,000 to $49,999 No Employed full-time or part-time 24-29 South 2010 1 Fitness band,Smart watch,Streaming media box o...
22 24 17 California Male Hispanic $50,000 to $99,999 Yes Student 14-23 West 2010 1 Digital video recorder,Streaming media box or ...
23 25 29 Missouri Male White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Employed full-time or part-time 24-29 Midwest 2010 1 Dedicated e-book reader,Fitness band,Virtual r...
24 26 29 Ohio Male White or Caucasian (Non-Hispanic) Less than $29,999 No Employed full-time or part-time 24-29 Midwest 2010 1 Desktop computer,Tablet
25 27 29 New Jersey Female White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Employed full-time or part-time 24-29 Northeast 2010 1 Blu-ray disc player/DVD player,Fitness band
26 28 29 Texas Female White or Caucasian (Non-Hispanic) $30,000 to $49,999 No Unemployed 24-29 South 2010 1
27 29 24 Arizona Female White or Caucasian (Non-Hispanic) $30,000 to $49,999 No Student 24-29 West 2010 1 Fitness band
28 30 29 California Female Hispanic Less than $29,999 Yes Unemployed 24-29 West 2010 1
29 31 29 California Female East Asian or Southeast Asian $50,000 to $99,999 No Employed full-time or part-time 24-29 West 2010 1
... ... ... ... ... ... ... ... ... ... ... ... ... ...
4016 2096 71 South Carolina Male Multiracial $30,000 to $49,999 No Retired 70 or older South 2011 1
4017 2097 73 Missouri Male White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Retired 70 or older Midwest 2011 1
4018 2099 73 Florida Male White or Caucasian (Non-Hispanic) Less than $29,999 No Self-employed 70 or older South 2011 1 Dedicated e-book reader,Drone
4019 2100 18 Louisiana Male White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Student 14-19 South 2011 1 Laptop computer
4020 2101 18 South Dakota Female White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Unemployed 14-19 Midwest 2011 1
4021 2102 15 New York Male White or Caucasian (Non-Hispanic) $100,000 to $299,999 Yes Self-employed 14-19 Northeast 2011 1
4022 2103 15 Florida Female White or Caucasian (Non-Hispanic) $100,000 to $299,999 Yes Employed full-time or part-time 14-19 South 2011 1 Dedicated e-book reader,Smart watch,Tablet
4023 2105 71 Idaho Female White or Caucasian (Non-Hispanic) Less than $29,999 No Retired 70 or older West 2011 1
4024 2106 76 Wisconsin Female White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Retired 70 or older Midwest 2011 1 Smartphone,Tablet
4025 2107 18 California Female Hispanic $30,000 to $49,999 No Student 14-19 West 2011 1 Basic mobile phone,Dedicated e-book reader,Dro...
4026 2109 73 Pennsylvania Female White or Caucasian (Non-Hispanic) $30,000 to $49,999 No Retired 70 or older Northeast 2011 1 Basic mobile phone,Digital video recorder,Dron...
4027 2110 71 Virginia Male White or Caucasian (Non-Hispanic) Less than $29,999 No Retired 70 or older South 2011 1
4028 2111 77 Michigan Female Multiracial $30,000 to $49,999 No Retired 70 or older Midwest 2011 1 Blu-ray disc player/DVD player
4029 2112 18 Texas Male White or Caucasian (Non-Hispanic) $30,000 to $49,999 Yes Student 14-19 South 2011 1 Digital video recorder
4030 2113 71 Alabama Male White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Retired 70 or older South 2011 1 Streaming media box or over-the-top box
4031 2115 14 Kansas Male White or Caucasian (Non-Hispanic) $50,000 to $99,999 Yes Self-employed 14-19 Midwest 2011 1 Dedicated e-book reader,Over-the-air digital T...
4032 2116 70 Florida Female White or Caucasian (Non-Hispanic) Less than $29,999 No Retired 70 or older South 2011 1
4033 2117 19 Maryland Female White or Caucasian (Non-Hispanic) $100,000 to $299,999 No Student 14-19 South 2011 1 Blu-ray disc player/DVD player,Drone,Portable ...
4034 2118 15 Florida Male White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Student 14-19 South 2011 1 Basic mobile phone,Dedicated e-book reader,Des...
4035 2119 14 Illinois Male White or Caucasian (Non-Hispanic) $100,000 to $299,999 Yes Student 14-19 Midwest 2011 1 Desktop computer,Smart watch,Smartphone
4036 2120 70 Washington Female White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Retired 70 or older West 2011 1
4037 2121 19 Virginia Female White or Caucasian (Non-Hispanic) Less than $29,999 Yes Unemployed 14-19 South 2011 1 Smart watch
4038 2122 18 Florida Male White or Caucasian (Non-Hispanic) $100,000 to $299,999 No Student 14-19 South 2011 1
4039 2124 15 Oregon Female White or Caucasian (Non-Hispanic) $100,000 to $299,999 Yes Student 14-19 West 2011 1 Drone
4040 2125 14 Ohio Male Multiracial Less than $29,999 Yes Student 14-19 Midwest 2011 1 Blu-ray disc player/DVD player,Drone,Gaming co...
4041 2126 14 Oregon Male White or Caucasian (Non-Hispanic) $100,000 to $299,999 Yes Unemployed 14-19 West 2011 1 Dedicated e-book reader,Desktop computer,Over-...
4042 2127 70 Massachusetts Male White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Retired 70 or older Northeast 2011 1
4043 2128 18 Alabama Male White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Unemployed 14-19 South 2011 1 Laptop computer,Virtual reality headset
4044 2129 79 Illinois Female White or Caucasian (Non-Hispanic) Less than $29,999 No Retired 70 or older Midwest 2011 1
4045 2130 77 Pennsylvania Female Multiracial $30,000 to $49,999 No Retired 70 or older Northeast 2011 1

4046 rows × 13 columns

In [26]:
# merging the 3 dataframes of products , subscriptions and future subscriptions 
df_merged1 =  pd.merge(df_new_8, df_new_26, on=['index','Age', 'State' , 'Gender' ,'Ethnicity' , 'Household Income' , 'Children in Household?',
                                'Employment status' , 'Age range' , 'Region' , 'year' , 'quantity'])
df_merged2 = pd.merge(df_merged1, df_new_10, on=['index','Age', 'State' , 'Gender' ,'Ethnicity' , 'Household Income' , 'Children in Household?',
                                'Employment status' , 'Age range' , 'Region' , 'year' , 'quantity'])

#eliminating rows that consists of null values in either of these 3 columns 
df_merged2.replace("", float("NaN") , inplace = True)
df_merged2.dropna(subset = ["future products"] , inplace = True)
df_merged2.dropna(subset = ["products"] , inplace = True)
df_merged2.dropna(subset = ["subscriptions"] , inplace = True)
df_merged2
Out[26]:
index Age State Gender Ethnicity Household Income Children in Household? Employment status Age range Region year quantity products subscriptions future products
0 0 29 New York Female White or Caucasian (Non-Hispanic) $50,000 to $99,999 Yes Employed full-time or part-time 24-29 Northeast 2010 1 Computer network/router in your home for wirel... Magazine(print or digital),Pay TV Drone,Flat panel television
1 1 29 California Male White or Caucasian (Non-Hispanic) $50,000 to $99,999 Yes Employed full-time or part-time 24-29 West 2010 1 Blu-ray disc player/DVD player,Desktop compute... Home Internet,Landline telephone,Mobile data p... Computer network/router in your home for wirel...
3 3 29 South Carolina Female White or Caucasian (Non-Hispanic) $30,000 to $49,999 Yes Student 24-29 South 2010 1 Computer network/router in your home for wirel... Home Internet,Mobile data plan Portable video game player
4 4 29 Virginia Male White or Caucasian (Non-Hispanic) $50,000 to $99,999 Yes Employed full-time or part-time 24-29 South 2010 1 Blu-ray disc player/DVD player,Desktop compute... Gaming Drone,Fitness band
5 5 29 Pennsylvania Male Multiracial $50,000 to $99,999 No Unemployed 24-29 Northeast 2010 1 Basic mobile phone,Blu-ray disc player/DVD pla... Gaming,Home Internet,Magazine(print or digital... Computer network/router in your home for wirel...
7 7 29 New Jersey Female White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Employed full-time or part-time 24-29 Northeast 2010 1 Blu-ray disc player/DVD player,Computer networ... Home Internet,Landline telephone,Magazine(prin... Streaming media box or over-the-top box,Tablet
8 9 29 Florida Male Multiracial Less than $29,999 No Student 24-29 South 2010 1 Basic mobile phone,Computer network/router in ... Home Internet,Mobile data plan,Pay TV,Streamin... Digital video recorder,Fitness band
9 10 29 New York Female African American $100,000 to $299,999 Yes Employed full-time or part-time 24-29 Northeast 2010 1 Basic mobile phone,Blu-ray disc player/DVD pla... Magazine(print or digital),Mobile data plan,Mo... Computer network/router in your home for wirel...
10 11 29 California Female East Asian or Southeast Asian $50,000 to $99,999 Yes Unemployed 24-29 West 2010 1 Basic mobile phone,Blu-ray disc player/DVD pla... Home Internet,Mobile data plan Tablet
12 13 29 Maine Male White or Caucasian (Non-Hispanic) $50,000 to $99,999 Yes Employed full-time or part-time 24-29 Northeast 2010 1 Blu-ray disc player/DVD player,Flat panel tele... Home Internet,News/Newspaper,Pay TV,Streaming ... Digital video recorder,Fitness band,Smart watch
13 14 29 Washington Male East Asian or Southeast Asian $50,000 to $99,999 No Unemployed 24-29 West 2010 1 Gaming console,Laptop computer,Smartphone Home Internet,Landline telephone,Mobile data p... Computer network/router in your home for wirel...
14 15 29 Massachusetts Male East Asian or Southeast Asian $100,000 to $299,999 No Employed full-time or part-time 24-29 Northeast 2010 1 Desktop computer,Fitness band,Gaming console,L... Pay TV Dedicated e-book reader,Virtual reality headset
16 17 29 Maine Female White or Caucasian (Non-Hispanic) $50,000 to $99,999 Yes Employed full-time or part-time 24-29 Northeast 2010 1 Blu-ray disc player/DVD player,Computer networ... Gaming,Home Internet,Landline telephone,Mobile... Streaming media box or over-the-top box
17 18 26 Florida Male White or Caucasian (Non-Hispanic) $50,000 to $99,999 Yes Self-employed 24-29 South 2010 1 Blu-ray disc player/DVD player,Desktop compute... Gaming,Home Internet,Landline telephone,Mobile... Dedicated e-book reader,Portable streaming thu...
19 20 29 Tennessee Male White or Caucasian (Non-Hispanic) Less than $29,999 Yes Student 24-29 South 2010 1 Basic mobile phone,Blu-ray disc player/DVD pla... Gaming,Home Internet,Mobile data plan,Pay TV,S... Smartphone
20 22 29 Wisconsin Female White or Caucasian (Non-Hispanic) $30,000 to $49,999 Yes Unemployed 24-29 Midwest 2010 1 Blu-ray disc player/DVD player,Computer networ... Gaming,Home Internet,Magazine(print or digital... Drone
21 23 29 North Carolina Male White or Caucasian (Non-Hispanic) $30,000 to $49,999 No Employed full-time or part-time 24-29 South 2010 1 Blu-ray disc player/DVD player,Computer networ... Gaming,Home Internet,Mobile data plan,Mobile v... Fitness band,Smart watch,Streaming media box o...
22 24 17 California Male Hispanic $50,000 to $99,999 Yes Student 14-23 West 2010 1 Blu-ray disc player/DVD player,Computer networ... Home Internet,Landline telephone,Mobile data p... Digital video recorder,Streaming media box or ...
23 25 29 Missouri Male White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Employed full-time or part-time 24-29 Midwest 2010 1 Blu-ray disc player/DVD player,Desktop compute... Home Internet,Magazine(print or digital),Mobil... Dedicated e-book reader,Fitness band,Virtual r...
24 26 29 Ohio Male White or Caucasian (Non-Hispanic) Less than $29,999 No Employed full-time or part-time 24-29 Midwest 2010 1 Digital video recorder,Flat panel television,G... Gaming,Home Internet,Magazine(print or digital... Desktop computer,Tablet
25 27 29 New Jersey Female White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Employed full-time or part-time 24-29 Northeast 2010 1 Computer network/router in your home for wirel... Home Internet,Landline telephone,Mobile data p... Blu-ray disc player/DVD player,Fitness band
27 29 24 Arizona Female White or Caucasian (Non-Hispanic) $30,000 to $49,999 No Student 24-29 West 2010 1 Blu-ray disc player/DVD player,Computer networ... Home Internet,Streaming video service Fitness band
30 32 19 Texas Male African American Less than $29,999 No Student 14-23 South 2010 1 Blu-ray disc player/DVD player,Dedicated e-boo... Home Internet,News/Newspaper,Pay TV Computer network/router in your home for wirel...
31 33 29 Ohio Female White or Caucasian (Non-Hispanic) Less than $29,999 Yes Unemployed 24-29 Midwest 2010 1 Laptop computer Home Internet,Landline telephone,Pay TV Flat panel television,Gaming console,Tablet
32 34 29 Pennsylvania Female African American Less than $29,999 Yes Employed full-time or part-time 24-29 Northeast 2010 1 Dedicated e-book reader,Desktop computer,Gamin... Home Internet,Mobile voice,Pay TV,Streaming vi... Blu-ray disc player/DVD player,Flat panel tele...
33 35 29 Minnesota Female East Asian or Southeast Asian $50,000 to $99,999 Yes Unemployed 24-29 Midwest 2010 1 Basic mobile phone,Blu-ray disc player/DVD pla... Landline telephone,Streaming music service,Str... Smart watch,Virtual reality headset
34 36 29 Ohio Male White or Caucasian (Non-Hispanic) $30,000 to $49,999 Yes Employed full-time or part-time 24-29 Midwest 2010 1 Desktop computer,Digital video recorder,Flat p... Home Internet,Mobile data plan,Mobile voice,Pa... Streaming media box or over-the-top box
35 37 19 Nevada Female Multiracial Less than $29,999 No Employed full-time or part-time 14-23 West 2010 1 Computer network/router in your home for wirel... Gaming,Home Internet,Mobile data plan,Streamin... Laptop computer
36 38 29 California Female East Asian or Southeast Asian $50,000 to $99,999 Yes Employed full-time or part-time 24-29 West 2010 1 Blu-ray disc player/DVD player,Fitness band,Fl... Gaming,Home Internet,Landline telephone,Mobile... Dedicated e-book reader,Smart watch
38 40 29 Indiana Female African American Less than $29,999 No Student 24-29 Midwest 2010 1 Blu-ray disc player/DVD player,Computer networ... Home Internet,Landline telephone,Pay TV,Stream... Desktop computer
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
3985 2063 14 Virginia Male White or Caucasian (Non-Hispanic) $50,000 to $99,999 Yes Employed full-time or part-time 14-19 South 2011 1 Blu-ray disc player/DVD player,Dedicated e-boo... Home Internet,Mobile voice,Streaming video ser... Over-the-air digital TV antenna
3986 2064 74 Missouri Male White or Caucasian (Non-Hispanic) $100,000 to $299,999 No Retired 70 or older Midwest 2011 1 Computer network/router in your home for wirel... Home Internet,Landline telephone,Magazine(prin... Tablet
3996 2075 14 Michigan Male White or Caucasian (Non-Hispanic) $50,000 to $99,999 Yes Student 14-19 Midwest 2011 1 Blu-ray disc player/DVD player,Dedicated e-boo... Gaming,Home Internet,Landline telephone,Magazi... Fitness band,Virtual reality headset
3998 2077 71 Florida Male White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Self-employed 70 or older South 2011 1 Basic mobile phone,Blu-ray disc player/DVD pla... Home Internet,Landline telephone,Magazine(prin... Drone
3999 2078 70 Maryland Male White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Retired 70 or older South 2011 1 Computer network/router in your home for wirel... Home Internet,Landline telephone,Mobile data p... Portable streaming thumb drive/fob,Tablet
4005 2084 71 Illinois Female White or Caucasian (Non-Hispanic) $50,000 to $99,999 Yes Retired 70 or older Midwest 2011 1 Basic mobile phone,Desktop computer,Flat panel... Home Internet,Landline telephone,Mobile voice,... Digital video recorder
4007 2087 19 Louisiana Male Pacific Islander (including Native Hawaiian, N... Less than $29,999 No Self-employed 14-19 South 2011 1 Basic mobile phone,Dedicated e-book reader,Fla... Mobile data plan,Streaming music service,Strea... Blu-ray disc player/DVD player,Desktop compute...
4008 2088 18 Pennsylvania Male East Asian or Southeast Asian Less than $29,999 No Employed full-time or part-time 14-19 Northeast 2011 1 Smartphone Gaming Smart watch
4011 2091 18 Texas Male Hispanic $50,000 to $99,999 No Student 14-19 South 2011 1 Gaming console,Laptop computer,Smartphone Gaming,Home Internet,Mobile data plan,Mobile v... Desktop computer,Virtual reality headset
4012 2092 73 California Male East Asian or Southeast Asian $100,000 to $299,999 Yes Self-employed 70 or older West 2011 1 Blu-ray disc player/DVD player,Computer networ... Home Internet,Landline telephone,Magazine(prin... Portable streaming thumb drive/fob
4013 2093 18 South Carolina Female White or Caucasian (Non-Hispanic) $30,000 to $49,999 No Student 14-19 South 2011 1 Basic mobile phone,Blu-ray disc player/DVD pla... Gaming,Home Internet,Landline telephone,Mobile... Virtual reality headset
4014 2094 14 Virginia Male White or Caucasian (Non-Hispanic) $30,000 to $49,999 Yes Student 14-19 South 2011 1 Blu-ray disc player/DVD player,Computer networ... Gaming,Home Internet,Mobile data plan,Mobile v... Drone,Streaming media box or over-the-top box,...
4015 2095 82 Washington Female White or Caucasian (Non-Hispanic) $100,000 to $299,999 No Retired 70 or older West 2011 1 Blu-ray disc player/DVD player,Computer networ... Home Internet,Landline telephone,Magazine(prin... Laptop computer
4018 2099 73 Florida Male White or Caucasian (Non-Hispanic) Less than $29,999 No Self-employed 70 or older South 2011 1 Blu-ray disc player/DVD player,Flat panel tele... Gaming,Home Internet,Mobile data plan,Mobile v... Dedicated e-book reader,Drone
4019 2100 18 Louisiana Male White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Student 14-19 South 2011 1 Blu-ray disc player/DVD player,Computer networ... Gaming,Home Internet,Mobile data plan,Mobile v... Laptop computer
4022 2103 15 Florida Female White or Caucasian (Non-Hispanic) $100,000 to $299,999 Yes Employed full-time or part-time 14-19 South 2011 1 Blu-ray disc player/DVD player,Computer networ... Magazine(print or digital),Mobile data plan,Mo... Dedicated e-book reader,Smart watch,Tablet
4024 2106 76 Wisconsin Female White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Retired 70 or older Midwest 2011 1 Basic mobile phone,Blu-ray disc player/DVD pla... Home Internet,Landline telephone,Magazine(prin... Smartphone,Tablet
4026 2109 73 Pennsylvania Female White or Caucasian (Non-Hispanic) $30,000 to $49,999 No Retired 70 or older Northeast 2011 1 Blu-ray disc player/DVD player,Computer networ... Landline telephone,Pay TV,Streaming video service Basic mobile phone,Digital video recorder,Dron...
4028 2111 77 Michigan Female Multiracial $30,000 to $49,999 No Retired 70 or older Midwest 2011 1 Basic mobile phone,Computer network/router in ... Home Internet,Landline telephone,Pay TV Blu-ray disc player/DVD player
4029 2112 18 Texas Male White or Caucasian (Non-Hispanic) $30,000 to $49,999 Yes Student 14-19 South 2011 1 Blu-ray disc player/DVD player,Computer networ... Home Internet,Mobile voice,Streaming video ser... Digital video recorder
4030 2113 71 Alabama Male White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Retired 70 or older South 2011 1 Blu-ray disc player/DVD player,Computer networ... Home Internet,Landline telephone,Magazine(prin... Streaming media box or over-the-top box
4031 2115 14 Kansas Male White or Caucasian (Non-Hispanic) $50,000 to $99,999 Yes Self-employed 14-19 Midwest 2011 1 Blu-ray disc player/DVD player,Computer networ... Gaming,Home Internet,Landline telephone,Magazi... Dedicated e-book reader,Over-the-air digital T...
4033 2117 19 Maryland Female White or Caucasian (Non-Hispanic) $100,000 to $299,999 No Student 14-19 South 2011 1 Computer network/router in your home for wirel... Home Internet,Landline telephone,Magazine(prin... Blu-ray disc player/DVD player,Drone,Portable ...
4034 2118 15 Florida Male White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Student 14-19 South 2011 1 Blu-ray disc player/DVD player,Digital video r... Home Internet,Mobile data plan,Mobile voice,Ne... Basic mobile phone,Dedicated e-book reader,Des...
4035 2119 14 Illinois Male White or Caucasian (Non-Hispanic) $100,000 to $299,999 Yes Student 14-19 Midwest 2011 1 Dedicated e-book reader,Digital video recorder... Home Internet,Landline telephone,Mobile data p... Desktop computer,Smart watch,Smartphone
4037 2121 19 Virginia Female White or Caucasian (Non-Hispanic) Less than $29,999 Yes Unemployed 14-19 South 2011 1 Blu-ray disc player/DVD player,Dedicated e-boo... Home Internet,Landline telephone,Mobile data p... Smart watch
4039 2124 15 Oregon Female White or Caucasian (Non-Hispanic) $100,000 to $299,999 Yes Student 14-19 West 2011 1 Computer network/router in your home for wirel... Gaming,Mobile data plan,Pay TV,Streaming video... Drone
4040 2125 14 Ohio Male Multiracial Less than $29,999 Yes Student 14-19 Midwest 2011 1 Flat panel television,Smartphone,Tablet,Virtua... Home Internet,Landline telephone,Pay TV,Stream... Blu-ray disc player/DVD player,Drone,Gaming co...
4041 2126 14 Oregon Male White or Caucasian (Non-Hispanic) $100,000 to $299,999 Yes Unemployed 14-19 West 2011 1 Blu-ray disc player/DVD player,Computer networ... Home Internet,Magazine(print or digital) Dedicated e-book reader,Desktop computer,Over-...
4043 2128 18 Alabama Male White or Caucasian (Non-Hispanic) $50,000 to $99,999 No Unemployed 14-19 South 2011 1 Basic mobile phone,Computer network/router in ... Gaming,Home Internet,Pay TV Laptop computer,Virtual reality headset

2369 rows × 15 columns

Determining Top 5 Products Currently Owned

In [27]:
#top 5 products owned
num_columns = df_Q_8.shape[1]

list_sum = []
list_col_name = []

for i in range(12 , num_columns-2):
    list_sum.append(df_Q_8.iloc[:,i].sum())
    list_col_name.append(df_Q_8.columns[i])
    

a=pd.DataFrame(data=list_sum,index = list_col_name,columns=['frequency'])
a.sort_values(by='frequency',ascending=False,inplace=True)
temp_5 = a.head(5)
temp_5

#plotting top 5 products
col = ['Laptop computer' , 'Smartphone' , 'Flat panel television' , 'Desktop computer' , 'Tablet']
temp_5['products'] = col
temp_5 = temp_5.set_index('products')
temp_5.plot.bar()
Out[27]:
<matplotlib.axes._subplots.AxesSubplot at 0x16d0af27c88>

Determining Top 5 Subscriptions Currently Purchased

In [28]:
#top 5 subcriptions
num_columns = df_Q_26.shape[1]

list_sum = []
list_col_name = []

for i in range(12 , num_columns-2):
    list_sum.append(df_Q_26.iloc[:,i].sum())
    list_col_name.append(df_Q_26.columns[i])
    

b=pd.DataFrame(data=list_sum,index = list_col_name,columns=['frequency'])
b.sort_values(by='frequency',ascending=False,inplace=True)
temp_6 = b.head(5)

col = ['Home Internet' , 'Pay TV' , 'Mobile voice' , 'Mobile data plan' , 'Landline telephone']
temp_6['subscriptions'] = col
temp_6 = temp_6.set_index('subscriptions')
temp_6.plot.bar()
Out[28]:
<matplotlib.axes._subplots.AxesSubplot at 0x16d0acfccf8>

Laptop Computer + Home Internet

In [29]:
# taking combination of product ( Laptop computer ) and subsciprtion ( Home Internet ) 
# to determine the highest frequency of future product desired
df_laptop = df_merged2.loc[df_merged2['products'].str.contains("Laptop computer")]

df_laptop =df_laptop.loc[df_laptop['subscriptions'].str.contains("Home Internet")]


#Creating a word dictionary to determine frequency of future products 
Product_wordDict ={}
for i in range (0,len(df_laptop)):
    my_string = " "
    my_list = []
    my_string = df_laptop.iloc[i , -1]
    my_list = my_string.split(",")
#     print(my_list)
    for i in range (0 , len(my_list)):
        key = my_list[i]
        if key not in  Product_wordDict:
            Product_wordDict[key] =0
        Product_wordDict[key] = Product_wordDict[key]+1
        
        
#Sorting the dictionary in increasing order of values, where keys are future procudts           
from operator import itemgetter
type(sorted(Product_wordDict.items(), key=itemgetter(1)))

plt.subplots(figsize = (20,10))
plt.bar(range(len(Product_wordDict)), list(Product_wordDict.values()), align='center')
plt.xticks(range(len(Product_wordDict)), list(Product_wordDict.keys()) , rotation = 45 , 
          horizontalalignment="right")
plt.xlabel('Future Products')
plt.ylabel('Frequency')
Out[29]:
Text(0, 0.5, 'Frequency')

We see that , people who currently own Laptop computers and have a Home Internet Subscription , are most likely to but a Fitness Band or Smart Watch in the future

Laptop Computer + Pay Tv

In [30]:
# taking combination of product ( Laptop computer ) and subsciprtion ( Pay TV ) 
# to determine the highest frequency of future product desired
df_laptop = df_merged2.loc[df_merged2['products'].str.contains("Laptop computer")]

df_laptop =df_laptop.loc[df_laptop['subscriptions'].str.contains("Pay TV")]
df_laptop


#Creating a word dictionary to determine frequency of future products 
Product_wordDict ={}
for i in range (0,len(df_laptop)):
    my_string = " "
    my_list = []
    my_string = df_laptop.iloc[i , -1]
    my_list = my_string.split(",")
#     print(my_list)
    for i in range (0 , len(my_list)):
        key = my_list[i]
        if key not in  Product_wordDict:
            Product_wordDict[key] =0
        Product_wordDict[key] = Product_wordDict[key]+1
                 
#Sorting the dictionary in increasing order of values, where keys are future procudts    
from operator import itemgetter
sorted(Product_wordDict.items(), key=itemgetter(1))

plt.subplots(figsize = (20,10))
plt.bar(range(len(Product_wordDict)), list(Product_wordDict.values()), align='center')
plt.xticks(range(len(Product_wordDict)), list(Product_wordDict.keys()) , rotation = 45 , 
          horizontalalignment="right")
plt.xlabel('Future Products')
plt.ylabel('Frequency')
Out[30]:
Text(0, 0.5, 'Frequency')

We see that , people who currently own Laptop computers and have a Pay TV Subscription , are most likely to but a Fitness Band or Smart Watch in the future

Laptop Computer + Mobile Voice

In [31]:
# taking combination of product ( Laptop computer ) and subsciprtion ( Mobile Voice) 
# to determine the highest frequency of future product desired

df_laptop = df_merged2.loc[df_merged2['products'].str.contains("Laptop computer")]

df_laptop =df_laptop.loc[df_laptop['subscriptions'].str.contains("Mobile voice")]
df_laptop

#Creating a word dictionary to determine frequency of future products 
Product_wordDict ={}
for i in range (0,len(df_laptop)):
    my_string = " "
    my_list = []
    my_string = df_laptop.iloc[i , -1]
    my_list = my_string.split(",")
#     print(my_list)
    for i in range (0 , len(my_list)):
        key = my_list[i]
        if key not in  Product_wordDict:
            Product_wordDict[key] =0
        Product_wordDict[key] = Product_wordDict[key]+1
        
        
#Sorting the dictionary in increasing order of values, where keys are future products           
from operator import itemgetter
sorted(Product_wordDict.items(), key=itemgetter(1))

plt.subplots(figsize = (20,10))
plt.bar(range(len(Product_wordDict)), list(Product_wordDict.values()), align='center')
plt.xticks(range(len(Product_wordDict)), list(Product_wordDict.keys()) , rotation = 45 , 
          horizontalalignment="right")

plt.xlabel('Future Products')
plt.ylabel('Frequency')
Out[31]:
Text(0, 0.5, 'Frequency')

We see that , people who currently own Laptop computers and have a Mobile Voice Subscription , are most likely to but a Fitness Band or Smart Watch in the future

Smart Phone + Home internet

In [32]:
# taking combination of product ( Smartphone ) and subsciprtion ( Home Internet ) 
# to determine the highest frequency of future product desired

df_smartphone = df_merged2.loc[df_merged2['products'].str.contains("Smartphone")]

df_smartphone =df_smartphone.loc[df_smartphone['subscriptions'].str.contains("Home Internet")]
df_smartphone


#Creating a word dictionary to determine frequency of future products 
Product_wordDict ={}
for i in range (0,len(df_smartphone)):
    my_string = " "
    my_list = []
    my_string = df_smartphone.iloc[i , -1]
    my_list = my_string.split(",")
#     print(my_list)
    for i in range (0 , len(my_list)):
        key = my_list[i]
        if key not in  Product_wordDict:
            Product_wordDict[key] =0
        Product_wordDict[key] = Product_wordDict[key]+1
        
        
#Sorting the dictionary in increasing order of values, where keys are future products       
from operator import itemgetter
sorted(Product_wordDict.items(), key=itemgetter(1))

plt.subplots(figsize = (20,10))
plt.bar(range(len(Product_wordDict)), list(Product_wordDict.values()), align='center')
plt.xticks(range(len(Product_wordDict)), list(Product_wordDict.keys()) , rotation = 45 , 
          horizontalalignment="right")

plt.xlabel('Future Products')
plt.ylabel('Frequency')
Out[32]:
Text(0, 0.5, 'Frequency')

We see that , people who currently own Smartphone and have a Home Internet Subscription , are most likely to but a Fitness Band or Smart Watch in the future

Flat Panel Television + Home Internet

In [33]:
# taking combination of product ( Smartphone ) and subsciprtion ( Home Internet ) 
# to determine the highest frequency of future product desired

df_TV = df_merged2.loc[df_merged2['products'].str.contains("Flat panel television")]

df_TV =df_TV.loc[df_TV['subscriptions'].str.contains("Home Internet")]


#Creating a word dictionary to determine frequency of future products 
Product_wordDict ={}
for i in range (0,len(df_TV)):
    my_string = " "
    my_list = []
    my_string = df_TV.iloc[i , -1]
    my_list = my_string.split(",")
#     print(my_list)
    for i in range (0 , len(my_list)):
        key = my_list[i]
        if key not in  Product_wordDict:
            Product_wordDict[key] =0
        Product_wordDict[key] = Product_wordDict[key]+1
        
        
#Sorting the dictionary in increasing order of values, where keys are future products       
from operator import itemgetter
sorted(Product_wordDict.items(), key=itemgetter(1))
Out[33]:
[('Basic mobile phone', 49),
 ('Desktop computer', 65),
 ('Gaming console', 66),
 ('Computer network/router in your home for wireless computer/laptop usage',
  69),
 ('Blu-ray disc player/DVD player', 85),
 ('Over-the-air digital TV antenna ', 88),
 ('Portable video game player', 91),
 ('Laptop computer', 96),
 ('Digital video recorder', 105),
 ('Smartphone', 106),
 ('Dedicated e-book reader', 141),
 ('Drone', 155),
 ('Portable streaming thumb drive/fob', 160),
 ('Streaming media box or over-the-top box', 171),
 ('Tablet', 192),
 ('Virtual reality headset', 197),
 ('Smart watch', 276),
 ('Fitness band', 278)]

We see that , people who currently own Flat Panel Television and have a Home Internet Subscription , are most likely to but a Fitness Band or Smart Watch in the future

Product + Subscription owned by Females

In [34]:
# filter the dataframe to only records that belong to Females
df_female = df_merged2.loc[df_merged2['Gender'] == "Female"]
df_female

#Creating a word dictionary to determine frequency of future products 
Product_wordDict ={}
for i in range (0,len(df_female)):
    my_string = " "
    my_list = []
    my_string = df_female.iloc[i , -3]
    my_list = my_string.split(",")
#     print(my_list)
    for i in range (0 , len(my_list)):
        key = my_list[i]
        if key not in  Product_wordDict:
            Product_wordDict[key] =0
        Product_wordDict[key] = Product_wordDict[key]+1
        
        
#Sorting the dictionary in increasing order of values, where keys are  products       
from operator import itemgetter
sorted(Product_wordDict.items(), key=itemgetter(1))


plt.subplots(figsize = (20,10))
plt.bar(range(len(Product_wordDict)), list(Product_wordDict.values()), align='center')
plt.xticks(range(len(Product_wordDict)), list(Product_wordDict.keys()) , rotation = 45 , 
          horizontalalignment="right")

plt.xlabel('Products')
plt.ylabel('Frequency')
Out[34]:
Text(0, 0.5, 'Frequency')
In [35]:
#Creating a word dictionary to determine frequency of subscriptions
Product_wordDict ={}
for i in range (0,len(df_female)):
    my_string = " "
    my_list = []
    my_string = df_female.iloc[i , -2]
    my_list = my_string.split(",")
#     print(my_list)
    for i in range (0 , len(my_list)):
        key = my_list[i]
        if key not in  Product_wordDict:
            Product_wordDict[key] =0
        Product_wordDict[key] = Product_wordDict[key]+1
        
        
#Sorting the dictionary in increasing order of values, where keys are subscriptions       
from operator import itemgetter
sorted(Product_wordDict.items(), key=itemgetter(1))

plt.subplots(figsize = (20,10))
plt.bar(range(len(Product_wordDict)), list(Product_wordDict.values()), align='center')
plt.xticks(range(len(Product_wordDict)), list(Product_wordDict.keys()) , rotation = 45 , 
          horizontalalignment="right")

plt.xlabel('Subscriptions')
plt.ylabel('Frequency')
Out[35]:
Text(0, 0.5, 'Frequency')

Top 3 products : Laptop Computer / Smartphone / Flat panel television

Top Subscription : Home Internet / Pay TV / Mobile data plan

Product + Subscription owned by Males

In [36]:
# filter the dataframe to only records that belong to Males
df_male = df_merged2.loc[df_merged2['Gender'] == "Male"]


Product_wordDict ={}
for i in range (0,len(df_male)):
    my_string = " "
    my_list = []
    my_string = df_male.iloc[i , -3]
    my_list = my_string.split(",")
#     print(my_list)
    for i in range (0 , len(my_list)):
        key = my_list[i]
        if key not in  Product_wordDict:
            Product_wordDict[key] =0
        Product_wordDict[key] = Product_wordDict[key]+1
        
        
#Sorting the dictionary in increasing order of values, where keys are products       
from operator import itemgetter
sorted(Product_wordDict.items(), key=itemgetter(1))

plt.subplots(figsize = (20,10))
plt.bar(range(len(Product_wordDict)), list(Product_wordDict.values()), align='center')
plt.xticks(range(len(Product_wordDict)), list(Product_wordDict.keys()) , rotation = 45 , 
          horizontalalignment="right")
plt.xlabel('Products')
plt.ylabel('Frequency')
Out[36]:
Text(0, 0.5, 'Frequency')
In [37]:
#Creating a word dictionary to determine frequency of Subscriptions
Product_wordDict ={}
for i in range (0,len(df_male)):
    my_string = " "
    my_list = []
    my_string = df_male.iloc[i , -2]
    my_list = my_string.split(",")
#     print(my_list)
    for i in range (0 , len(my_list)):
        key = my_list[i]
        if key not in  Product_wordDict:
            Product_wordDict[key] =0
        Product_wordDict[key] = Product_wordDict[key]+1
        
        
#Sorting the dictionary in increasing order of values, where keys are Subscriptions      
from operator import itemgetter
sorted(Product_wordDict.items(), key=itemgetter(1))


plt.subplots(figsize = (20,10))
# ax = sns.barplot(x=range(len(Product_wordDict)), y=, data=tips)
plt.bar(range(len(Product_wordDict)), list(Product_wordDict.values()), align='center')
plt.xticks(range(len(Product_wordDict)), list(Product_wordDict.keys()) , rotation = 45 , 
          horizontalalignment="right")
plt.xlabel('Subscriptions')
plt.ylabel('Frequency')
Out[37]:
Text(0, 0.5, 'Frequency')

Top Products : Laptop Computer / Smartphone / Deskstop computer

Top Subscription : Home Internet / Pay TV / Streaming Video Service

Business Case 2 : Inclination Towards Ad-based video streaming

In [38]:
#reassigning column names for filtering purpose of data in 2009
df_10.columns=column_names_10
df_10

#part1 contains all columns having Q26-Video data
part1=df_10.filter(regex='^Q26',axis=1)
part1=part1.iloc[:,5].to_frame()

#part2 contains all columns having QNEW28 data
part2=df_10.filter(regex='^QNEW28',axis=1)
#part3 contains all columns having Q39 data
part3=df_10.filter(regex='^Q39',axis=1)


print('There are' ,part1.isna().any().sum(),' null values in part1 dataframe.')
print('There are' ,part2.isna().any().sum(),' null values in part2 dataframe.')
print('There are' ,part3.isna().any().sum(),' null values in part3 dataframe.')

#Replacing all NaN Values with No
part1.replace(to_replace ="-9999", value ="No",inplace=True)
part2.replace(to_replace ="-9999", value ="Never",inplace=True)
part3.replace(to_replace ="-9999", value ="N/A; I do not have a basis to answer",inplace=True)

# #replacing all Yes with 1 and No with 0
part1.replace(to_replace=['Yes','No'],value=[1,0],inplace=True)
part2.replace(to_replace=['Never','Rarely (one to three times a year)','Occasionally (monthly)','Frequently (every day/weekly)'],value=[1,2,2,3],inplace=True)
part3.replace(to_replace=['N/A; I do not have a basis to answer','Disagree strongly','Disagree somewhat','Agree somewhat','Agree strongly'],value=[0,0,0,1,1],inplace=True)

#part0 contains Age, Location, Ethnicity and Income Data
part00=df_10.iloc[:,:8]
part01=df_10.iloc[:,15]

task2_10=pd.concat([part00,part01,part1,part2,part3], axis=1)
task2_10["year"] = [2010 for i in range(len(task2_10))]

task2_10.head(5)
There are 0  null values in part1 dataframe.
There are 0  null values in part2 dataframe.
There are 0  null values in part3 dataframe.
Out[38]:
Q1r1 - To begin, what is your age? Q4 - What is your gender? age - you are... Q2 - In which state do you currently reside? region - Region QNEW3 - What is your employment status? Q5 - Which category best describes your ethnicity? QNEW1 - Do you have children living in your home (excluding yourself if you are under 18)? Q6 - Into which of the following categories does your total annual household income fall before taxes? Again, we promise to keep this, and all your answers, completely confidential. Q26 - Which of the following subscriptions does your household purchase?-Streaming video service ... Q39r1 - I would rather pay for news online in exchange for not being exposed to advertisements. - Using the scale below, please indicate how much you agree or disagree with the following statements. If the question does not apply to you, choose "N/A." Q39rNEW1 - I would rather pay for sports information online in exchange for not being exposed to advertisements. - Using the scale below, please indicate how much you agree or disagree with the following statements. If the question does not apply to you, c Q39rNEW2 - I would rather pay for games online in exchange for not being exposed to advertisements. - Using the scale below, please indicate how much you agree or disagree with the following statements. If the question does not apply to you, choose "N/A." Q39rNEW3 - I would rather pay for music online in exchange for not being exposed to advertisements. - Using the scale below, please indicate how much you agree or disagree with the following statements. If the question does not apply to you, choose "N/A." Q39rNEW4 - I would rather pay for TV shows online in exchange for not being exposed to advertisements. - Using the scale below, please indicate how much you agree or disagree with the following statements. If the question does not apply to you, choose "N/A Q39rNEW5 - I would rather pay for movies online in exchange for not being exposed to advertisements. - Using the scale below, please indicate how much you agree or disagree with the following statements. If the question does not apply to you, choose "N/A." Q39r2 - I would be willing to provide more personal information online if that meant I could receive advertising more targeted to my needs and interests. - Using the scale below, please indicate how much you agree or disagree with the following statements. Q39r3 - By providing more personal information online, I am worried about becoming a victim of identity theft. - Using the scale below, please indicate how much you agree or disagree with the following statements. If the question does not apply to you, cho Q39r4 - I would be willing to view advertising with my streaming video programming if it significantly reduced the cost of the subscription. - Using the scale below, please indicate how much you agree or disagree with the following statements. If the quest year
0 29 Female 24-29 New York Northeast Employed full-time or part-time White or Caucasian (Non-Hispanic) Yes $50,000 to $99,999 0 ... 1 1 1 1 1 1 1 1 1 2010
1 29 Male 24-29 California West Employed full-time or part-time White or Caucasian (Non-Hispanic) Yes $50,000 to $99,999 1 ... 1 1 1 0 1 1 1 0 1 2010
2 29 Female 24-29 Colorado West Employed full-time or part-time White or Caucasian (Non-Hispanic) No Less than $29,999 1 ... 0 0 1 1 1 1 0 1 0 2010
3 29 Female 24-29 South Carolina South Student White or Caucasian (Non-Hispanic) Yes $30,000 to $49,999 0 ... 1 1 1 0 1 1 1 1 1 2010
4 29 Male 24-29 Virginia South Employed full-time or part-time White or Caucasian (Non-Hispanic) Yes $50,000 to $99,999 0 ... 1 1 0 1 1 1 1 1 1 2010

5 rows × 21 columns

In [39]:
#reassigning column names for filtering purpose of data in 2009
df_11.columns=column_names_11

#part1 contains all columns having Q26-Video data
part1=df_11.filter(regex='^Q26',axis=1)
part1=part1.iloc[:,5].to_frame()

#part2 contains all columns having QNEW28 data
part2=df_11.filter(regex='^QNEW28',axis=1)
# part3 contains all columns having Q8 data
part3=df_11.filter(regex='^Q39',axis=1)


print('There are' ,part1.isna().any().sum(),' null values in part1 dataframe.')
print('There are' ,part2.isna().any().sum(),' null values in part2 dataframe.')
print('There are' ,part3.isna().any().sum(),' null values in part3 dataframe.')

#Replacing all NaN Values with No
part1.replace(to_replace ="-9999", value ="No",inplace=True)
part2.replace(to_replace ="-9999", value ="Never",inplace=True)
part3.replace(to_replace ="-9999", value ="N/A; I do not have a basis to answer",inplace=True)

#replacing all Yes with 1 and No with 0
part1.replace(to_replace=['Yes','No'],value=[1,0],inplace=True)
part2.replace(to_replace=['Never','Rarely (one to three times a year)','Occasionally (monthly)','Frequently (every day/weekly)'],value=[1,2,2,3],inplace=True)
part3.replace(to_replace=['N/A; I do not have a basis to answer','Disagree strongly','Disagree somewhat','Agree somewhat','Agree strongly'],value=[0,0,0,1,1],inplace=True)

#part0 contains Age, Location, Ethnicity and Income Data
part00=df_11.iloc[:,:8]
part01=df_11.iloc[:,15]

task2_11=pd.concat([part00,part01,part1,part2,part3], axis=1)
task2_11["year"] = [2011 for i in range(len(task2_11))]

task2_11.head(5)
There are 0  null values in part1 dataframe.
There are 0  null values in part2 dataframe.
There are 0  null values in part3 dataframe.
Out[39]:
Q1r1 - To begin, what is your age? Q4 - What is your gender? age - you are... Q2 - In which state do you currently reside? region - Region QNEW3 - What is your employment status? Q5 - Which category best describes your ethnicity? QNEW1 - Do you have children living in your home (excluding yourself if you are under 18)? Q6 - Into which of the following categories does your total annual household income fall before taxes? Again, we promise to keep this, and all your answers, completely confidential. Q26 - Which of the following subscriptions does your household purchase?-Streaming video service ... Q39r1 - I would rather pay for news online in exchange for not being exposed to advertisements. - Using the scale below, please indicate how much you agree or disagree with the following statements. If the question does not apply to you, choose "N/A." Q39rNEW1 - I would rather pay for sports information online in exchange for not being exposed to advertisements. - Using the scale below, please indicate how much you agree or disagree with the following statements. If the question does not apply to you, c Q39rNEW2 - I would rather pay for games online in exchange for not being exposed to advertisements. - Using the scale below, please indicate how much you agree or disagree with the following statements. If the question does not apply to you, choose "N/A." Q39rNEW3 - I would rather pay for music online in exchange for not being exposed to advertisements. - Using the scale below, please indicate how much you agree or disagree with the following statements. If the question does not apply to you, choose "N/A." Q39rNEW4 - I would rather pay for TV shows online in exchange for not being exposed to advertisements. - Using the scale below, please indicate how much you agree or disagree with the following statements. If the question does not apply to you, choose "N/A Q39rNEW5 - I would rather pay for movies online in exchange for not being exposed to advertisements. - Using the scale below, please indicate how much you agree or disagree with the following statements. If the question does not apply to you, choose "N/A." Q39r2 - I would be willing to provide more personal information online if that meant I could receive advertising more targeted to my needs and interests. - Using the scale below, please indicate how much you agree or disagree with the following statements. Q39r3 - By providing more personal information online, I am worried about becoming a victim of identity theft. - Using the scale below, please indicate how much you agree or disagree with the following statements. If the question does not apply to you, cho Q39r4 - I would be willing to view advertising with my streaming video programming if it significantly reduced the cost of the subscription.(e.g., reduced subscription cost by 25%) - Using the scale below, please indicate how much you agree or disagree wit year
0 36 Male 34-50 Georgia South Employed full-time or part-time White or Caucasian (Non-Hispanic) Yes $50,000 to $99,999 0 ... 1 1 1 1 1 1 1 1 1 2011
1 26 Female 20-26 New York Northeast Employed full-time or part-time White or Caucasian (Non-Hispanic) Yes $50,000 to $99,999 1 ... 0 1 1 0 0 0 1 0 0 2011
2 32 Female 27-33 New Jersey Northeast Employed full-time or part-time White or Caucasian (Non-Hispanic) Yes Less than $29,999 0 ... 1 1 1 1 1 1 1 1 1 2011
3 25 Female 20-26 California West Employed full-time or part-time White or Caucasian (Non-Hispanic) Yes Less than $29,999 1 ... 1 1 1 1 1 1 1 1 1 2011
4 28 Male 27-33 Indiana Midwest Student White or Caucasian (Non-Hispanic) Yes $50,000 to $99,999 1 ... 0 1 0 0 0 0 1 1 0 2011

5 rows × 21 columns

Classification of Consumer Behavious: Ad-based Video Subscription

In [40]:
#Preparing the combined dataframe for 2010 & 2011 dataset
task2_11 = task2_11.rename(columns={"Q39r4 - I would be willing to view advertising with my streaming video programming if it significantly reduced the cost of the subscription.(e.g., reduced subscription cost by 25%) - Using the scale below, please indicate how much you agree or disagree wit": "Q39r4 - I would be willing to view advertising with my streaming video programming if it significantly reduced the cost of the subscription. - Using the scale below, please indicate how much you agree or disagree with the following statements. If the quest"})
task2_11 = task2_11.rename(columns={"QNEW28 - How frequently do you use a friend or family member's (someone not living in your household) subscription login information  to watch digital content?" : "QNEW28 - How frequently do you use a friend or family member's (someone not living in your household) subscription login information to watch digital content?"})
df_task2 = task2_10.append(task2_11, ignore_index = True)

#Selecting the necessary columns for the 
df_task2_1 = df_task2.iloc[:, [1,2,4,5,6,7,8,9,10,11,12,13,14,15,16,17,-2]]

#Definig feature & target dataframes
X = df_task2_1.iloc[:, :-1]
Y = df_task2_1.iloc[:,-1]
In [41]:
#Plotting the Distribtuion of target variable
plt.subplots(figsize=(12,12))
sns.countplot(x='Q39r4 - I would be willing to view advertising with my streaming video programming if it significantly reduced the cost of the subscription. - Using the scale below, please indicate how much you agree or disagree with the following statements. If the quest'
              ,data=df_task2_1)
plt.title("Distribution of Target Variable")
plt.grid()
plt.show()
In [42]:
# Categorical boolean mask
categorical_feature_mask = X.dtypes == object

# filter categorical columns using mask and turn it into a list
categorical_cols = X.columns[categorical_feature_mask].tolist()

# instantiate labelencoder object
le = LabelEncoder()

# apply le on categorical feature columns
X[categorical_cols] = X[categorical_cols].apply(lambda col: le.fit_transform(col))

X.head(10)
Out[42]:
Q4 - What is your gender? age - you are... region - Region QNEW3 - What is your employment status? Q5 - Which category best describes your ethnicity? QNEW1 - Do you have children living in your home (excluding yourself if you are under 18)? Q6 - Into which of the following categories does your total annual household income fall before taxes? Again, we promise to keep this, and all your answers, completely confidential. Q26 - Which of the following subscriptions does your household purchase?-Streaming video service QNEW28 - How frequently do you use a friend or family member's (someone not living in your household) subscription login information to watch digital content? Q39r1 - I would rather pay for news online in exchange for not being exposed to advertisements. - Using the scale below, please indicate how much you agree or disagree with the following statements. If the question does not apply to you, choose "N/A." Q39rNEW1 - I would rather pay for sports information online in exchange for not being exposed to advertisements. - Using the scale below, please indicate how much you agree or disagree with the following statements. If the question does not apply to you, c Q39rNEW2 - I would rather pay for games online in exchange for not being exposed to advertisements. - Using the scale below, please indicate how much you agree or disagree with the following statements. If the question does not apply to you, choose "N/A." Q39rNEW3 - I would rather pay for music online in exchange for not being exposed to advertisements. - Using the scale below, please indicate how much you agree or disagree with the following statements. If the question does not apply to you, choose "N/A." Q39rNEW4 - I would rather pay for TV shows online in exchange for not being exposed to advertisements. - Using the scale below, please indicate how much you agree or disagree with the following statements. If the question does not apply to you, choose "N/A Q39rNEW5 - I would rather pay for movies online in exchange for not being exposed to advertisements. - Using the scale below, please indicate how much you agree or disagree with the following statements. If the question does not apply to you, choose "N/A." Q39r2 - I would be willing to provide more personal information online if that meant I could receive advertising more targeted to my needs and interests. - Using the scale below, please indicate how much you agree or disagree with the following statements.
0 0 3 1 0 9 1 2 0 3 1 1 1 1 1 1 1
1 1 3 3 0 9 1 2 1 2 1 1 1 0 1 1 1
2 0 3 3 0 9 0 3 1 3 0 0 1 1 1 1 0
3 0 3 2 3 9 1 1 0 3 1 1 1 0 1 1 1
4 1 3 2 0 9 1 2 0 3 1 1 0 1 1 1 1
5 1 3 1 4 5 0 2 1 2 0 1 1 1 0 1 1
6 0 3 0 0 9 0 3 1 1 0 0 0 0 0 0 0
7 0 3 1 0 9 0 2 1 1 0 0 0 0 0 0 0
8 1 3 2 3 5 0 3 1 3 0 0 0 1 1 1 0
9 0 3 1 0 0 1 0 1 3 1 1 1 1 1 1 1
In [43]:
#Refernce : https://towardsdatascience.com/encoding-categorical-features-21a2651a065c

# create training and testing vars
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.25, random_state = 1, shuffle = True)

# Create the classifier:
classifier = RandomForestClassifier(n_estimators = 100,  random_state = 42)

# Fit the classifier to the training data
classifier.fit(np.array(X_train), np.array(Y_train))

# Predict on the test data: y_pred
Y_pred = classifier.predict(np.array(X_test))
Y_pred_prob = classifier.predict_proba(np.array(X_test))
pred_proba = [np.round((Y_pred_prob[i][1]),4) for i in range(len(Y_pred_prob))]

print("The Accuracy of model is ")
print(np.round(accuracy_score(Y_test,Y_pred),4))

#Calculating metrics to plot an ROC curve
fpr_rfc, tpr_rfc, threshold1  = roc_curve(Y_test, pred_proba)

pd.crosstab(Y_test, Y_pred)
The Accuracy of model is 
0.7125
Out[43]:
col_0 0 1
Q39r4 - I would be willing to view advertising with my streaming video programming if it significantly reduced the cost of the subscription. - Using the scale below, please indicate how much you agree or disagree with the following statements. If the quest
0 312 159
1 132 409
In [44]:
# Create the classifier:
svc = SVC(probability = True)  
# Fit the classifier to the training data
svc.fit(X_train, Y_train)
    
# Predict probability on the test data: y_pred
prob_pred = svc.predict_proba(X_test)

# Predict on the test data: y_pred
Y_pred = svc.predict(np.array(X_test))
Y_pred_prob = svc.predict_proba(np.array(X_test))
pred_proba = [np.round((Y_pred_prob[i][1]),4) for i in range(len(Y_pred_prob))]

print("The Accuracy of Support Vector Machine Model is ")
print(np.round(accuracy_score(Y_test,Y_pred),4))

#Calculating metrics to plot an ROC curve
fpr_svc, tpr_svc, threshold2  = roc_curve(Y_test, pred_proba)

pd.crosstab(Y_test, Y_pred)
The Accuracy of Support Vector Machine Model is 
0.7223
Out[44]:
col_0 0 1
Q39r4 - I would be willing to view advertising with my streaming video programming if it significantly reduced the cost of the subscription. - Using the scale below, please indicate how much you agree or disagree with the following statements. If the quest
0 332 139
1 142 399
In [45]:
#Plotting an RO graph to using false postive and true postive rates to determine the model performence
#Refernece: https://www.dezyre.com/recipes/plot-roc-curve-in-python

plt.figure(figsize=(10,8))
plt.plot(fpr_rfc, tpr_rfc, label ='RandomForestClassifier', linestyle='-')
plt.plot(fpr_svc, tpr_svc, label ='SupportVectorMachine', linestyle='-')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve')
plt.legend(loc="lower right")
plt.show()

Business Case 3: Hyper Ad-targeting using customer segmentation

In [46]:
#Selecting the dataframe for the task
df_task2_2 = df_task2.iloc[ : ,[1,2,3,5,8,11,12,13,14,15,16]]
df_task2_2 = df_task2_2.set_index(['Q4 - What is your gender?', 'age - you are...','Q2 - In which state do you currently reside?',
 'QNEW3 - What is your employment status?',
 'Q6 - Into which of the following categories does your total annual household income fall before taxes? Again, we promise to keep this, and all your answers, completely confidential.'])

df_task2_2.head()
Out[46]:
Q39r1 - I would rather pay for news online in exchange for not being exposed to advertisements. - Using the scale below, please indicate how much you agree or disagree with the following statements. If the question does not apply to you, choose "N/A." Q39rNEW1 - I would rather pay for sports information online in exchange for not being exposed to advertisements. - Using the scale below, please indicate how much you agree or disagree with the following statements. If the question does not apply to you, c Q39rNEW2 - I would rather pay for games online in exchange for not being exposed to advertisements. - Using the scale below, please indicate how much you agree or disagree with the following statements. If the question does not apply to you, choose "N/A." Q39rNEW3 - I would rather pay for music online in exchange for not being exposed to advertisements. - Using the scale below, please indicate how much you agree or disagree with the following statements. If the question does not apply to you, choose "N/A." Q39rNEW4 - I would rather pay for TV shows online in exchange for not being exposed to advertisements. - Using the scale below, please indicate how much you agree or disagree with the following statements. If the question does not apply to you, choose "N/A Q39rNEW5 - I would rather pay for movies online in exchange for not being exposed to advertisements. - Using the scale below, please indicate how much you agree or disagree with the following statements. If the question does not apply to you, choose "N/A."
Q4 - What is your gender? age - you are... Q2 - In which state do you currently reside? QNEW3 - What is your employment status? Q6 - Into which of the following categories does your total annual household income fall before taxes? Again, we promise to keep this, and all your answers, completely confidential.
Female 24-29 New York Employed full-time or part-time $50,000 to $99,999 1 1 1 1 1 1
Male 24-29 California Employed full-time or part-time $50,000 to $99,999 1 1 1 0 1 1
Female 24-29 Colorado Employed full-time or part-time Less than $29,999 0 0 1 1 1 1
South Carolina Student $30,000 to $49,999 1 1 1 0 1 1
Male 24-29 Virginia Employed full-time or part-time $50,000 to $99,999 1 1 0 1 1 1
In [47]:
#Determining Value K for Kmeans Clustering using elbow method 
#Referece: https://pythonprogramminglanguage.com/kmeans-elbow-method/

distortions = []
K = range(1,10)

for k in K:
    kmeanModel = KMeans(n_clusters=k, random_state = 0).fit(df_task2_2)
    kmeanModel.fit(df_task2_2)
    distortions.append(sum(np.min(cdist(df_task2_2, kmeanModel.cluster_centers_, 'euclidean'), axis=1)) / df_task2_2.shape[0])

# Plot the elbow
plt.plot(K, distortions, 'bx-')
plt.xlabel('Value of K')
plt.ylabel('Distortion')
plt.title('The Elbow Method to select the optimal K')
plt.show()
In [48]:
#Development of KMode clustering model
clustModel = KModes(n_clusters=3, init='Huang', n_init=5, verbose=1)

clust_labels = clustModel.fit_predict(df_task2_2)

#Plotting the cluster centers
cluster_centers = clustModel.cluster_centroids_
Init: initializing centroids
Init: initializing clusters
Starting iterations...
Run 1, iteration: 1/100, moves: 662, cost: 2790.0
Run 1, iteration: 2/100, moves: 278, cost: 2790.0
Init: initializing centroids
Init: initializing clusters
Starting iterations...
Run 2, iteration: 1/100, moves: 836, cost: 2959.0
Init: initializing centroids
Init: initializing clusters
Starting iterations...
Run 3, iteration: 1/100, moves: 446, cost: 2683.0
Init: initializing centroids
Init: initializing clusters
Starting iterations...
Run 4, iteration: 1/100, moves: 1034, cost: 2826.0
Run 4, iteration: 2/100, moves: 205, cost: 2813.0
Run 4, iteration: 3/100, moves: 5, cost: 2813.0
Init: initializing centroids
Init: initializing clusters
Starting iterations...
Run 5, iteration: 1/100, moves: 442, cost: 3159.0
Best run was number 3
In [49]:
#Development of KMeans clustering model

clustModel = KMeans(n_clusters = 3, random_state = 10)
clustModel.fit(df_task2_2)
clust_labels = clustModel.predict(df_task2_2)

#Assinging cluster label for each shop 
df_task2_2['clust_labels'] = clust_labels

#Plotting the cluster centers
cluster_centers=clustModel.cluster_centers_

x = cluster_centers[:, 0]
y = cluster_centers[:, 1]
plt.scatter(x, y, s=75)
plt.title("Scatter Plot indicating Clusters")
plt.grid()
plt.show()
In [50]:
#Plotting group behaviour of users in a specific cluster
clust1 = df_task2_2[df_task2_2['clust_labels'] == 2].reset_index()
clust1_col = clust1.columns.to_list()
df_clust1_grp = clust1.groupby(clust1_col[5:11])['clust_labels'].sum()
df_clust1_grp = pd.DataFrame(df_clust1_grp)
df_clust1_grp = df_clust1_grp.reset_index()
df_clust1_grp.sort_values('clust_labels', ascending=False).head(10)
Out[50]:
Q39r1 - I would rather pay for news online in exchange for not being exposed to advertisements. - Using the scale below, please indicate how much you agree or disagree with the following statements. If the question does not apply to you, choose "N/A." Q39rNEW1 - I would rather pay for sports information online in exchange for not being exposed to advertisements. - Using the scale below, please indicate how much you agree or disagree with the following statements. If the question does not apply to you, c Q39rNEW2 - I would rather pay for games online in exchange for not being exposed to advertisements. - Using the scale below, please indicate how much you agree or disagree with the following statements. If the question does not apply to you, choose "N/A." Q39rNEW3 - I would rather pay for music online in exchange for not being exposed to advertisements. - Using the scale below, please indicate how much you agree or disagree with the following statements. If the question does not apply to you, choose "N/A." Q39rNEW4 - I would rather pay for TV shows online in exchange for not being exposed to advertisements. - Using the scale below, please indicate how much you agree or disagree with the following statements. If the question does not apply to you, choose "N/A Q39rNEW5 - I would rather pay for movies online in exchange for not being exposed to advertisements. - Using the scale below, please indicate how much you agree or disagree with the following statements. If the question does not apply to you, choose "N/A." clust_labels
0 0 0 0 0 1 1 246
9 0 0 1 1 1 1 142
3 0 0 0 1 1 1 140
1 0 0 0 1 0 1 94
2 0 0 0 1 1 0 80
24 1 0 0 1 1 1 80
21 1 0 0 0 1 1 68
6 0 0 1 0 1 1 68
15 0 1 0 1 1 1 66
30 1 1 0 0 1 1 58
In [51]:
#Plttong the Distribution of Employment Status in Cluster 1
plt.subplots(figsize=(10,8))

sns.countplot(x='QNEW3 - What is your employment status?', data=clust1[['QNEW3 - What is your employment status?']])
plt.xlabel('Employment Status')
plt.ylabel("Frequency")
plt.title("Distribution of Employment Status in Cluster 1")
plt.grid()
plt.show()
In [52]:
#Plotting the Location of Users in Cluster 1

plt.subplots(figsize=(12,10))

sns.countplot(y='Q2 - In which state do you currently reside?', data=clust1[['Q2 - In which state do you currently reside?']])
plt.xlabel('Frequency')
plt.ylabel("State")
plt.title("Location of Users in Cluster 1")
plt.show()
In [53]:
#Plotting the salary of Users in Cluster 1

plt.subplots(figsize=(12,10))

sns.countplot(y='Q6 - Into which of the following categories does your total annual household income fall before taxes? Again, we promise to keep this, and all your answers, completely confidential.', data=clust1[['Q6 - Into which of the following categories does your total annual household income fall before taxes? Again, we promise to keep this, and all your answers, completely confidential.']])
plt.xlabel('Frequency')
plt.ylabel("Salary Distribution")
plt.title("Salaary of Users in Cluster 1")
plt.show()